Btrfs: early reference counting

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3c5f4c2..1118986 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -34,6 +34,37 @@
 	memset(p, 0, sizeof(*p));
 }
 
+int btrfs_cow_block(struct ctree_root *root,
+		    struct tree_buffer *buf,
+		    struct tree_buffer *parent,
+		    int parent_slot,
+		    struct tree_buffer **cow_ret)
+{
+	struct tree_buffer *cow;
+
+	if (!list_empty(&buf->dirty)) {
+		*cow_ret = buf;
+		return 0;
+	}
+	cow = alloc_free_block(root);
+	memcpy(&cow->node, &buf->node, sizeof(buf->node));
+	cow->node.header.blocknr = cow->blocknr;
+	*cow_ret = cow;
+	if (buf == root->node) {
+		root->node = cow;
+		cow->count++;
+		tree_block_release(root, buf);
+	} else {
+		parent->node.blockptrs[parent_slot] = cow->blocknr;
+		BUG_ON(list_empty(&parent->dirty));
+	}
+	if (0 && root != root->extent_root && !is_leaf(cow->node.header.flags)) {
+		btrfs_inc_ref(root, cow);
+	}
+	tree_block_release(root, buf);
+	return 0;
+}
+
 /*
  * The leaf data grows from end-to-front in the node.
  * this returns the address of the start of the last item,
@@ -263,6 +294,8 @@
 
 	/* first, try to make some room in the middle buffer */
 	if (left_buf) {
+		btrfs_cow_block(root, left_buf, parent_buf,
+				pslot - 1, &left_buf);
 		left = &left_buf->node;
 		orig_slot += left->header.nritems;
 		wret = push_node_left(root, left_buf, mid_buf);
@@ -274,6 +307,8 @@
 	 * then try to empty the right most buffer into the middle
 	 */
 	if (right_buf) {
+		btrfs_cow_block(root, right_buf, parent_buf,
+				pslot + 1, &right_buf);
 		right = &right_buf->node;
 		wret = push_node_left(root, mid_buf, right_buf);
 		if (wret < 0)
@@ -293,9 +328,7 @@
 		} else {
 			memcpy(parent->keys + pslot + 1, right->keys,
 				sizeof(struct key));
-			wret = dirty_tree_block(root, parent_buf);
-			if (wret)
-				ret = wret;
+			BUG_ON(list_empty(&parent_buf->dirty));
 		}
 	}
 	if (mid->header.nritems == 1) {
@@ -330,9 +363,7 @@
 	} else {
 		/* update the parent key to reflect our changes */
 		memcpy(parent->keys + pslot, mid->keys, sizeof(struct key));
-		wret = dirty_tree_block(root, parent_buf);
-		if (wret)
-			ret = wret;
+		BUG_ON(list_empty(&parent_buf->dirty));
 	}
 
 	/* update the path */
@@ -375,9 +406,10 @@
  * possible)
  */
 int search_slot(struct ctree_root *root, struct key *key,
-		struct ctree_path *p, int ins_len)
+		struct ctree_path *p, int ins_len, int cow)
 {
 	struct tree_buffer *b;
+	struct tree_buffer *cow_buf;
 	struct node *c;
 	int slot;
 	int ret;
@@ -387,8 +419,15 @@
 	b = root->node;
 	b->count++;
 	while (b) {
+		level = node_level(b->node.header.flags);
+		if (cow) {
+			int wret;
+			wret = btrfs_cow_block(root, b, p->nodes[level + 1],
+					       p->slots[level + 1], &cow_buf);
+			b = cow_buf;
+		}
+		BUG_ON(!cow && ins_len);
 		c = &b->node;
-		level = node_level(c->header.flags);
 		p->nodes[level] = b;
 		ret = check_block(p, level);
 		if (ret)
@@ -453,7 +492,6 @@
 {
 	int i;
 	int ret = 0;
-	int wret;
 	for (i = level; i < MAX_LEVEL; i++) {
 		struct node *t;
 		int tslot = path->slots[i];
@@ -461,9 +499,7 @@
 			break;
 		t = &path->nodes[i]->node;
 		memcpy(t->keys + tslot, key, sizeof(*key));
-		wret = dirty_tree_block(root, path->nodes[i]);
-		if (wret)
-			ret = wret;
+		BUG_ON(list_empty(&path->nodes[i]->dirty));
 		if (tslot != 0)
 			break;
 	}
@@ -486,7 +522,6 @@
 	int src_nritems;
 	int dst_nritems;
 	int ret = 0;
-	int wret;
 
 	src_nritems = src->header.nritems;
 	dst_nritems = dst->header.nritems;
@@ -511,13 +546,8 @@
 	src->header.nritems -= push_items;
 	dst->header.nritems += push_items;
 
-	wret = dirty_tree_block(root, src_buf);
-	if (wret < 0)
-		ret = wret;
-
-	wret = dirty_tree_block(root, dst_buf);
-	if (wret < 0)
-		ret = wret;
+	BUG_ON(list_empty(&src_buf->dirty));
+	BUG_ON(list_empty(&dst_buf->dirty));
 	return ret;
 }
 
@@ -541,7 +571,6 @@
 	int src_nritems;
 	int dst_nritems;
 	int ret = 0;
-	int wret;
 
 	src_nritems = src->header.nritems;
 	dst_nritems = dst->header.nritems;
@@ -569,13 +598,8 @@
 	src->header.nritems -= push_items;
 	dst->header.nritems += push_items;
 
-	wret = dirty_tree_block(root, src_buf);
-	if (wret < 0)
-		ret = wret;
-
-	wret = dirty_tree_block(root, dst_buf);
-	if (wret < 0)
-		ret = wret;
+	BUG_ON(list_empty(&src_buf->dirty));
+	BUG_ON(list_empty(&dst_buf->dirty));
 	return ret;
 }
 
@@ -615,7 +639,6 @@
 	tree_block_release(root, root->node);
 	root->node = t;
 	t->count++;
-	dirty_tree_block(root, t);
 	path->nodes[level] = t;
 	path->slots[level] = 0;
 	return 0;
@@ -655,7 +678,7 @@
 	lower->header.nritems++;
 	if (lower->keys[1].objectid == 0)
 			BUG();
-	dirty_tree_block(root, path->nodes[level]);
+	BUG_ON(list_empty(&path->nodes[level]->dirty));
 	return 0;
 }
 
@@ -701,12 +724,7 @@
 	c->header.nritems = mid;
 	ret = 0;
 
-	wret = dirty_tree_block(root, t);
-	if (wret)
-		ret = wret;
-	wret = dirty_tree_block(root, split_buffer);
-	if (wret)
-		ret = wret;
+	BUG_ON(list_empty(&t->dirty));
 	wret = insert_ptr(root, path, split->keys, split_buffer->blocknr,
 			  path->slots[level + 1] + 1, level + 1);
 	if (wret)
@@ -778,6 +796,15 @@
 		tree_block_release(root, right_buf);
 		return 1;
 	}
+	/* cow and double check */
+	btrfs_cow_block(root, right_buf, upper, slot + 1, &right_buf);
+	right = &right_buf->leaf;
+	free_space = leaf_free_space(right);
+	if (free_space < data_size + sizeof(struct item)) {
+		tree_block_release(root, right_buf);
+		return 1;
+	}
+
 	for (i = left->header.nritems - 1; i >= 0; i--) {
 		item = left->items + i;
 		if (path->slots[0] == i)
@@ -818,11 +845,12 @@
 	}
 	left->header.nritems -= push_items;
 
-	dirty_tree_block(root, left_buf);
-	dirty_tree_block(root, right_buf);
+	BUG_ON(list_empty(&left_buf->dirty));
+	BUG_ON(list_empty(&right_buf->dirty));
 	memcpy(upper->node.keys + slot + 1,
 		&right->items[0].key, sizeof(struct key));
-	dirty_tree_block(root, upper);
+	BUG_ON(list_empty(&upper->dirty));
+
 	/* then fixup the leaf pointer in the path */
 	if (path->slots[0] >= left->header.nritems) {
 		path->slots[0] -= left->header.nritems;
@@ -869,6 +897,16 @@
 		tree_block_release(root, t);
 		return 1;
 	}
+
+	/* cow and double check */
+	btrfs_cow_block(root, t, path->nodes[1], slot - 1, &t);
+	left = &t->leaf;
+	free_space = leaf_free_space(left);
+	if (free_space < data_size + sizeof(struct item)) {
+		tree_block_release(root, t);
+		return 1;
+	}
+
 	for (i = 0; i < right->header.nritems; i++) {
 		item = right->items + i;
 		if (path->slots[0] == i)
@@ -912,12 +950,8 @@
 		push_space = right->items[i].offset;
 	}
 
-	wret = dirty_tree_block(root, t);
-	if (wret)
-		ret = wret;
-	wret = dirty_tree_block(root, right_buf);
-	if (wret)
-		ret = wret;
+	BUG_ON(list_empty(&t->dirty));
+	BUG_ON(list_empty(&right_buf->dirty));
 
 	wret = fixup_low_keys(root, path, &right->items[0].key, 1);
 	if (wret)
@@ -968,6 +1002,7 @@
 		if (wret < 0)
 			return wret;
 	}
+
 	l_buf = path->nodes[0];
 	l = &l_buf->leaf;
 
@@ -1022,13 +1057,8 @@
 			  right_buffer->blocknr, path->slots[1] + 1, 1);
 	if (wret)
 		ret = wret;
-	wret = dirty_tree_block(root, right_buffer);
-	if (wret)
-		ret = wret;
-	wret = dirty_tree_block(root, l_buf);
-	if (wret)
-		ret = wret;
-
+	BUG_ON(list_empty(&right_buffer->dirty));
+	BUG_ON(list_empty(&l_buf->dirty));
 	BUG_ON(path->slots[0] != slot);
 	if (mid <= slot) {
 		tree_block_release(root, path->nodes[0]);
@@ -1049,7 +1079,6 @@
 			  void *data, int data_size)
 {
 	int ret = 0;
-	int wret;
 	int slot;
 	int slot_orig;
 	struct leaf *leaf;
@@ -1062,7 +1091,7 @@
 	if (!root->node)
 		BUG();
 	init_path(&path);
-	ret = search_slot(root, key, &path, data_size);
+	ret = search_slot(root, key, &path, data_size, 1);
 	if (ret == 0) {
 		release_path(root, &path);
 		return -EEXIST;
@@ -1114,10 +1143,7 @@
 	if (slot == 0)
 		ret = fixup_low_keys(root, &path, key, 1);
 
-	wret = dirty_tree_block(root, leaf_buf);
-	if (wret)
-		ret = wret;
-
+	BUG_ON(list_empty(&leaf_buf->dirty));
 	if (leaf_free_space(leaf) < 0)
 		BUG();
 	check_leaf(&path, 0);
@@ -1162,9 +1188,7 @@
 		if (wret)
 			ret = wret;
 	}
-	wret = dirty_tree_block(root, parent);
-	if (wret)
-		ret = wret;
+	BUG_ON(list_empty(&parent->dirty));
 	return ret;
 }
 
@@ -1205,7 +1229,7 @@
 	if (leaf->header.nritems == 0) {
 		if (leaf_buf == root->node) {
 			leaf->header.flags = node_level(0);
-			dirty_tree_block(root, leaf_buf);
+			BUG_ON(list_empty(&leaf_buf->dirty));
 		} else {
 			clean_tree_block(root, leaf_buf);
 			wret = del_ptr(root, path, 1, path->slots[1]);
@@ -1223,9 +1247,7 @@
 			if (wret)
 				ret = wret;
 		}
-		wret = dirty_tree_block(root, leaf_buf);
-		if (wret)
-			ret = wret;
+		BUG_ON(list_empty(&leaf_buf->dirty));
 
 		/* delete the leaf if it is mostly empty */
 		if (used < LEAF_DATA_SIZE / 3) {
@@ -1304,3 +1326,4 @@
 	return 0;
 }
 
+
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6b4dabd..9fe8ba6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -142,8 +142,9 @@
 };
 
 struct tree_buffer *alloc_free_block(struct ctree_root *root);
+int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf);
 int free_extent(struct ctree_root *root, u64 blocknr, u64 num_blocks);
-int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len);
+int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len, int cow);
 void release_path(struct ctree_root *root, struct ctree_path *p);
 void init_path(struct ctree_path *p);
 int del_item(struct ctree_root *root, struct ctree_path *path);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b1a8149..0e1c31e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -260,6 +260,8 @@
 	if (buf->count < 0)
 		BUG();
 	if (buf->count == 0) {
+		BUG_ON(!list_empty(&buf->cache));
+		BUG_ON(!list_empty(&buf->dirty));
 		if (!radix_tree_lookup(&root->cache_radix, buf->blocknr))
 			BUG();
 		radix_tree_delete(&root->cache_radix, buf->blocknr);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fb21444..25d9cd1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -15,6 +15,39 @@
  */
 #define CTREE_EXTENT_PENDING 0
 
+static int inc_block_ref(struct ctree_root *root, u64 blocknr)
+{
+	struct ctree_path path;
+	int ret;
+	struct key key;
+	struct leaf *l;
+	struct extent_item *item;
+	init_path(&path);
+	key.objectid = blocknr;
+	key.flags = 0;
+	key.offset = 1;
+	ret = search_slot(root->extent_root, &key, &path, 0, 1);
+	BUG_ON(ret != 0);
+	l = &path.nodes[0]->leaf;
+	item = (struct extent_item *)(l->data +
+				      l->items[path.slots[0]].offset);
+	item->refs++;
+	BUG_ON(list_empty(&path.nodes[0]->dirty));
+	release_path(root->extent_root, &path);
+	return 0;
+}
+
+int btrfs_inc_ref(struct ctree_root *root, struct tree_buffer *buf)
+{
+	u64 blocknr;
+	int i;
+	for (i = 0; i < buf->node.header.nritems; i++) {
+		blocknr = buf->node.blockptrs[i];
+		inc_block_ref(root, blocknr);
+	}
+	return 0;
+}
+
 /*
  * find all the blocks marked as pending in the radix tree and remove
  * them from the extent map
@@ -39,7 +72,7 @@
 			key.flags = 0;
 			key.offset = 1;
 			init_path(&path);
-			ret = search_slot(extent_root, &key, &path, -1);
+			ret = search_slot(extent_root, &key, &path, -1, 1);
 			if (ret) {
 				print_tree(extent_root, extent_root->node);
 				printf("unable to find %Lu\n", key.objectid);
@@ -83,7 +116,7 @@
 		return 0;
 	}
 	init_path(&path);
-	ret = search_slot(extent_root, &key, &path, -1);
+	ret = search_slot(extent_root, &key, &path, -1, 1);
 	if (ret) {
 		print_tree(extent_root, extent_root->node);
 		printf("failed to find %Lu\n", key.objectid);
@@ -124,7 +157,7 @@
 	ins->offset = 0;
 	ins->flags = 0;
 	start_found = 0;
-	ret = search_slot(root, ins, &path, 0);
+	ret = search_slot(root, ins, &path, 0, 0);
 	if (ret < 0)
 		goto error;
 
@@ -221,6 +254,8 @@
 			ret = insert_item(extent_root, &key, &item,
 					  sizeof(item));
 			if (ret) {
+				printf("%Lu already in tree\n", key.objectid);
+				print_tree(extent_root, extent_root->node);
 				BUG();
 				// FIXME undo it and return sane
 				return ret;
@@ -228,6 +263,7 @@
 			radix_tree_tag_clear(&extent_root->cache_radix,
 					     gang[i]->blocknr,
 					     CTREE_EXTENT_PENDING);
+			printf("%Lu is not pending\n", gang[i]->blocknr);
 			tree_block_release(extent_root, gang[i]);
 		}
 	}
@@ -266,15 +302,18 @@
 		if (pending_ret)
 			return pending_ret;
 		*buf = find_tree_block(root, ins->objectid);
+		dirty_tree_block(root, *buf);
 		return 0;
 	}
 	/* we're allocating an extent for the extent tree, don't recurse */
 	BUG_ON(ins->offset != 1);
 	*buf = find_tree_block(root, ins->objectid);
 	BUG_ON(!*buf);
+	printf("%Lu is pending\n", ins->objectid);
 	radix_tree_tag_set(&root->cache_radix, ins->objectid,
 			   CTREE_EXTENT_PENDING);
 	(*buf)->count++;
+	dirty_tree_block(root, *buf);
 	return 0;
 
 }
diff --git a/fs/btrfs/quick-test.c b/fs/btrfs/quick-test.c
index dbd00c3..8255f79 100644
--- a/fs/btrfs/quick-test.c
+++ b/fs/btrfs/quick-test.c
@@ -19,7 +19,7 @@
 	int i;
 	int num;
 	int ret;
-	int run_size = 100000;
+	int run_size = 1024;
 	int max_key =  100000000;
 	int tree_size = 0;
 	struct ctree_path path;
@@ -57,7 +57,7 @@
 		init_path(&path);
 		if (i % 10000 == 0)
 			fprintf(stderr, "search %d:%d\n", num, i);
-		ret = search_slot(root, &ins, &path, 0);
+		ret = search_slot(root, &ins, &path, 0, 0);
 		if (ret) {
 			print_tree(root, root->node);
 			printf("unable to find %d\n", num);
@@ -79,7 +79,7 @@
 		num = next_key(i, max_key);
 		ins.objectid = num;
 		init_path(&path);
-		ret = search_slot(root, &ins, &path, -1);
+		ret = search_slot(root, &ins, &path, -1, 1);
 		if (!ret) {
 			if (i % 10000 == 0)
 				fprintf(stderr, "del %d:%d\n", num, i);
@@ -117,7 +117,7 @@
 		init_path(&path);
 		if (i % 10000 == 0)
 			fprintf(stderr, "search %d:%d\n", num, i);
-		ret = search_slot(root, &ins, &path, 0);
+		ret = search_slot(root, &ins, &path, 0, 0);
 		if (ret) {
 			print_tree(root, root->node);
 			printf("unable to find %d\n", num);
@@ -131,7 +131,7 @@
 		int slot;
 		ins.objectid = (u64)-1;
 		init_path(&path);
-		ret = search_slot(root, &ins, &path, -1);
+		ret = search_slot(root, &ins, &path, -1, 1);
 		if (ret == 0)
 			BUG();
 
diff --git a/fs/btrfs/random-test.c b/fs/btrfs/random-test.c
index 53245c50..dcc852a 100644
--- a/fs/btrfs/random-test.c
+++ b/fs/btrfs/random-test.c
@@ -93,7 +93,7 @@
 	ret = setup_key(radix, &key, 1);
 	if (ret < 0)
 		return 0;
-	ret = search_slot(root, &key, &path, -1);
+	ret = search_slot(root, &key, &path, -1, 1);
 	if (ret)
 		goto error;
 	ret = del_item(root, &path);
@@ -118,7 +118,7 @@
 	ret = setup_key(radix, &key, 1);
 	if (ret < 0)
 		return 0;
-	ret = search_slot(root, &key, &path, 0);
+	ret = search_slot(root, &key, &path, 0, 1);
 	release_path(root, &path);
 	if (ret)
 		goto error;
@@ -137,7 +137,7 @@
 	ret = setup_key(radix, &key, 0);
 	if (ret < 0)
 		return ret;
-	ret = search_slot(root, &key, &path, 0);
+	ret = search_slot(root, &key, &path, 0, 0);
 	release_path(root, &path);
 	if (ret <= 0)
 		goto error;
@@ -163,7 +163,7 @@
 	key.objectid = (unsigned long)-1;
 	while(nr-- >= 0) {
 		init_path(&path);
-		ret = search_slot(root, &key, &path, -1);
+		ret = search_slot(root, &key, &path, -1, 1);
 		if (ret < 0) {
 			release_path(root, &path);
 			return ret;
@@ -216,7 +216,7 @@
 				return ret;
 			}
 		}
-		if (i % 10000 == 0) {
+		if (i && i % 10000 == 0) {
 			printf("bigfill %d\n", i);
 		}
 		if (!keep_running)
@@ -263,7 +263,7 @@
 	key.objectid = (unsigned long)-1;
 	while(1) {
 		init_path(&path);
-		ret = search_slot(root, &key, &path, 0);
+		ret = search_slot(root, &key, &path, 0, 0);
 		if (ret < 0) {
 			release_path(root, &path);
 			return ret;