Btrfs: Record dirty pages tree-log pages in an extent_io tree

This is the same way the transaction code makes sure that all the
other tree blocks are safely on disk.  There's an extent_io tree
for each root, and any blocks allocated to the tree logs are
recorded in that tree.

At tree-log sync, the extent_io tree is walked to flush down the
dirty pages and wait for them.

The main benefit is less time spent walking the tree log and skipping
clean pages, and getting sequential IO down to the drive.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2ed6918..eb65fd8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -668,6 +668,8 @@
 	struct btrfs_key root_key;
 	struct btrfs_fs_info *fs_info;
 	struct inode *inode;
+	struct extent_io_tree dirty_log_pages;
+
 	struct kobject root_kobj;
 	struct completion kobj_unregister;
 	struct mutex objectid_mutex;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5edb7f8..57fbf10 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -777,6 +777,8 @@
 	spin_lock_init(&root->list_lock);
 	mutex_init(&root->objectid_mutex);
 	mutex_init(&root->log_mutex);
+	extent_io_tree_init(&root->dirty_log_pages,
+			     fs_info->btree_inode->i_mapping, GFP_NOFS);
 
 	btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
 	root->ref_tree = &root->ref_tree_struct;
@@ -819,11 +821,23 @@
 			     struct btrfs_fs_info *fs_info)
 {
 	struct extent_buffer *eb;
+	struct btrfs_root *log_root_tree = fs_info->log_root_tree;
+	u64 start = 0;
+	u64 end = 0;
 	int ret;
 
-	if (!fs_info->log_root_tree)
+	if (!log_root_tree)
 		return 0;
 
+	while(1) {
+		ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
+				    0, &start, &end, EXTENT_DIRTY);
+		if (ret)
+			break;
+
+		clear_extent_dirty(&log_root_tree->dirty_log_pages,
+				   start, end, GFP_NOFS);
+	}
 	eb = fs_info->log_root_tree->node;
 
 	WARN_ON(btrfs_header_level(eb) != 0);
@@ -1412,7 +1426,6 @@
 	memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
 	       sizeof(struct btrfs_key));
 	insert_inode_hash(fs_info->btree_inode);
-	mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
 
 	mutex_init(&fs_info->trans_mutex);
 	mutex_init(&fs_info->tree_log_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c479d71..c0bb6b9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2392,8 +2392,13 @@
 	btrfs_tree_lock(buf);
 	clean_tree_block(trans, root, buf);
 	btrfs_set_buffer_uptodate(buf);
-	set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
+	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
+		set_extent_dirty(&root->dirty_log_pages, buf->start,
 			 buf->start + buf->len - 1, GFP_NOFS);
+	} else {
+		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
+			 buf->start + buf->len - 1, GFP_NOFS);
+	}
 	trans->blocks_used++;
 	return buf;
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 61a377b..151b00d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -302,23 +302,18 @@
 }
 
 
-int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
-				     struct btrfs_root *root)
+int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
+					struct extent_io_tree *dirty_pages)
 {
 	int ret;
 	int err = 0;
 	int werr = 0;
-	struct extent_io_tree *dirty_pages;
 	struct page *page;
 	struct inode *btree_inode = root->fs_info->btree_inode;
 	u64 start = 0;
 	u64 end;
 	unsigned long index;
 
-	if (!trans || !trans->transaction) {
-		return filemap_write_and_wait(btree_inode->i_mapping);
-	}
-	dirty_pages = &trans->transaction->dirty_pages;
 	while(1) {
 		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
 					    EXTENT_DIRTY);
@@ -385,6 +380,18 @@
 	return werr;
 }
 
+int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root)
+{
+	if (!trans || !trans->transaction) {
+		struct inode *btree_inode;
+		btree_inode = root->fs_info->btree_inode;
+		return filemap_write_and_wait(btree_inode->i_mapping);
+	}
+	return btrfs_write_and_wait_marked_extents(root,
+					   &trans->transaction->dirty_pages);
+}
+
 static int update_cowonly_root(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root)
 {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index cc63650..eef2cb7 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -99,4 +99,6 @@
 				   struct btrfs_root *root);
 void btrfs_throttle(struct btrfs_root *root);
 int btrfs_record_root_in_trans(struct btrfs_root *root);
+int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
+					struct extent_io_tree *dirty_pages);
 #endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ae96451..bfa7108 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1954,10 +1954,6 @@
 	int ret;
 	unsigned long batch;
 	struct btrfs_root *log = root->log_root;
-	struct walk_control wc = {
-		.write = 1,
-		.process_func = process_one_buffer
-	};
 
 	mutex_lock(&log->fs_info->tree_log_mutex);
 	if (atomic_read(&log->fs_info->tree_log_commit)) {
@@ -1985,18 +1981,11 @@
 		if (batch == log->fs_info->tree_log_batch)
 			break;
 	}
-	ret = walk_log_tree(trans, log, &wc);
+
+	ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
 	BUG_ON(ret);
-
-	ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc);
-	BUG_ON(ret);
-
-	wc.wait = 1;
-
-	ret = walk_log_tree(trans, log, &wc);
-	BUG_ON(ret);
-
-	ret = walk_log_tree(trans, log->fs_info->log_root_tree, &wc);
+	ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree,
+			       &root->fs_info->log_root_tree->dirty_log_pages);
 	BUG_ON(ret);
 
 	btrfs_set_super_log_root(&root->fs_info->super_for_commit,
@@ -2025,6 +2014,8 @@
 	int ret;
 	struct btrfs_root *log;
 	struct key;
+	u64 start;
+	u64 end;
 	struct walk_control wc = {
 		.free = 1,
 		.process_func = process_one_buffer
@@ -2037,6 +2028,16 @@
 	ret = walk_log_tree(trans, log, &wc);
 	BUG_ON(ret);
 
+	while(1) {
+		ret = find_first_extent_bit(&log->dirty_log_pages,
+				    0, &start, &end, EXTENT_DIRTY);
+		if (ret)
+			break;
+
+		clear_extent_dirty(&log->dirty_log_pages,
+				   start, end, GFP_NOFS);
+	}
+
 	log = root->log_root;
 	ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
 			     &log->root_key);