Btrfs: early work to file_write in big extents

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e2fc6f0..6b76fd9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1249,6 +1249,60 @@
 	return ret;
 }
 
+int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
+		      *root, struct btrfs_path *path, u32 data_size)
+{
+	int ret = 0;
+	int slot;
+	int slot_orig;
+	struct btrfs_leaf *leaf;
+	struct buffer_head *leaf_buf;
+	u32 nritems;
+	unsigned int data_end;
+	unsigned int old_data;
+	unsigned int old_size;
+	int i;
+
+	slot_orig = path->slots[0];
+	leaf_buf = path->nodes[0];
+	leaf = btrfs_buffer_leaf(leaf_buf);
+
+	nritems = btrfs_header_nritems(&leaf->header);
+	data_end = leaf_data_end(root, leaf);
+
+	if (btrfs_leaf_free_space(root, leaf) < data_size)
+		BUG();
+	slot = path->slots[0];
+	old_data = btrfs_item_end(leaf->items + slot);
+
+	BUG_ON(slot < 0);
+	BUG_ON(slot >= nritems);
+
+	/*
+	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
+	 */
+	/* first correct the data pointers */
+	for (i = slot; i < nritems; i++) {
+		u32 ioff = btrfs_item_offset(leaf->items + i);
+		btrfs_set_item_offset(leaf->items + i,
+				      ioff - data_size);
+	}
+	/* shift the data */
+	btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
+		      data_end - data_size, btrfs_leaf_data(leaf) +
+		      data_end, old_data - data_end);
+	data_end = old_data;
+	old_size = btrfs_item_size(leaf->items + slot);
+	btrfs_set_item_size(leaf->items + slot, old_size + data_size);
+	btrfs_mark_buffer_dirty(leaf_buf);
+
+	ret = 0;
+	if (btrfs_leaf_free_space(root, leaf) < 0)
+		BUG();
+	check_leaf(root, path, 0);
+	return ret;
+}
+
 /*
  * Given a key and some data, insert an item into the tree.
  * This does all the path init required, making room in the tree if needed.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ef3583c..796f19d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -227,6 +227,7 @@
 } __attribute__ ((__packed__));
 
 struct btrfs_csum_item {
+	__le64 extent_offset;
 	u8 csum[BTRFS_CSUM_SIZE];
 } __attribute__ ((__packed__));
 
@@ -924,6 +925,17 @@
 	e->num_blocks = cpu_to_le64(val);
 }
 
+static inline u64 btrfs_csum_extent_offset(struct btrfs_csum_item *c)
+{
+	return le64_to_cpu(c->extent_offset);
+}
+
+static inline void btrfs_set_csum_extent_offset(struct btrfs_csum_item *c,
+						u64 val)
+{
+	c->extent_offset = cpu_to_le64(val);
+}
+
 static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d)
 {
 	return le16_to_cpu(d->pathlen);
@@ -1002,6 +1014,8 @@
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
 			       btrfs_root *root);
 /* ctree.c */
+int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
+		      *root, struct btrfs_path *path, u32 data_size);
 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
 		      *root, struct btrfs_key *key, struct btrfs_path *p, int
 		      ins_len, int cow);
@@ -1071,6 +1085,7 @@
 int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root,
 			  u64 objectid, u64 offset,
+			  u64 extent_offset,
 			  char *data, size_t len);
 int btrfs_csum_verify_file_block(struct btrfs_root *root,
 				 u64 objectid, u64 offset,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index d9fd7f4..93d42d6 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -3,6 +3,9 @@
 #include "disk-io.h"
 #include "transaction.h"
 
+#define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \
+				 sizeof(struct btrfs_item)) / \
+				sizeof(struct btrfs_csum_item)) - 1))
 int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
 			       u64 objectid, u64 offset,
@@ -43,6 +46,54 @@
 	return 0;
 }
 
+static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root,
+						  struct btrfs_path *path,
+						  u64 objectid, u64 offset)
+{
+	int ret;
+	struct btrfs_key file_key;
+	struct btrfs_key found_key;
+	struct btrfs_csum_item *item;
+	struct btrfs_leaf *leaf;
+	u64 csum_offset = 0;
+
+	file_key.objectid = objectid;
+	file_key.offset = offset;
+	file_key.flags = 0;
+	btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+	ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0);
+	if (ret < 0)
+		goto fail;
+	leaf = btrfs_buffer_leaf(path->nodes[0]);
+	if (ret > 0) {
+		ret = 1;
+		if (path->slots[0] == 0)
+			goto fail;
+		path->slots[0]--;
+		btrfs_disk_key_to_cpu(&found_key,
+				      &leaf->items[path->slots[0]].key);
+		if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
+		    found_key.objectid != objectid) {
+			goto fail;
+		}
+		csum_offset = (offset - found_key.offset) >>
+				root->fs_info->sb->s_blocksize_bits;
+		if (csum_offset >=
+		    btrfs_item_size(leaf->items + path->slots[0]) /
+		    sizeof(struct btrfs_csum_item)) {
+			goto fail;
+		}
+	}
+	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
+	item += csum_offset;
+	return item;
+fail:
+	if (ret > 0)
+		ret = -EIO;
+	return ERR_PTR(ret);
+}
+
+
 int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root,
 			     struct btrfs_path *path, u64 objectid,
@@ -52,11 +103,16 @@
 	struct btrfs_key file_key;
 	int ins_len = mod < 0 ? -1 : 0;
 	int cow = mod != 0;
+	struct btrfs_csum_item *csum_item;
 
+	csum_item = __lookup_csum_item(root, path, objectid, offset);
+	if (IS_ERR(csum_item))
+		return PTR_ERR(csum_item);
 	file_key.objectid = objectid;
-	file_key.offset = offset;
+	file_key.offset = btrfs_csum_extent_offset(csum_item);
 	file_key.flags = 0;
 	btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
+	btrfs_release_path(root, path);
 	ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
 	return ret;
 }
@@ -64,12 +120,16 @@
 int btrfs_csum_file_block(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root,
 			  u64 objectid, u64 offset,
+			  u64 extent_offset,
 			  char *data, size_t len)
 {
 	int ret;
 	struct btrfs_key file_key;
+	struct btrfs_key found_key;
 	struct btrfs_path *path;
 	struct btrfs_csum_item *item;
+	struct btrfs_leaf *leaf;
+	u64 csum_offset;
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
@@ -78,14 +138,50 @@
 	file_key.offset = offset;
 	file_key.flags = 0;
 	btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+	ret = btrfs_search_slot(trans, root, &file_key, path,
+				sizeof(struct btrfs_csum_item), 1);
+	if (ret < 0)
+		goto fail;
+	if (ret == 0) {
+		csum_offset = 0;
+		goto csum;
+	}
+	if (path->slots[0] == 0) {
+		btrfs_release_path(root, path);
+		goto insert;
+	}
+	path->slots[0]--;
+	leaf = btrfs_buffer_leaf(path->nodes[0]);
+	btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key);
+	csum_offset = (offset - found_key.offset) >>
+			root->fs_info->sb->s_blocksize_bits;
+	if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
+	    found_key.objectid != objectid ||
+	    csum_offset >= MAX_CSUM_ITEMS(root)) {
+		btrfs_release_path(root, path);
+		goto insert;
+	}
+	if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) /
+	    sizeof(struct btrfs_csum_item)) {
+		ret = btrfs_extend_item(trans, root, path,
+					sizeof(struct btrfs_csum_item));
+		BUG_ON(ret);
+		goto csum;
+	}
+
+insert:
+	csum_offset = 0;
 	ret = btrfs_insert_empty_item(trans, root, path, &file_key,
-				      BTRFS_CSUM_SIZE);
+				      sizeof(struct btrfs_csum_item));
 	if (ret != 0 && ret != -EEXIST)
 		goto fail;
+csum:
 	item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
 			      struct btrfs_csum_item);
 	ret = 0;
+	item += csum_offset;
 	ret = btrfs_csum_data(root, data, len, item->csum);
+	btrfs_set_csum_extent_offset(item, extent_offset);
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 fail:
 	btrfs_release_path(root, path);
@@ -111,12 +207,13 @@
 	file_key.flags = 0;
 	btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
 	mutex_lock(&root->fs_info->fs_mutex);
-	ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0);
-	if (ret)
+
+	item = __lookup_csum_item(root, path, objectid, offset);
+	if (IS_ERR(item)) {
+		ret = PTR_ERR(item);
 		goto fail;
-	item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
-			      struct btrfs_csum_item);
-	ret = 0;
+	}
+
 	ret = btrfs_csum_data(root, data, len, result);
 	WARN_ON(ret);
 	if (memcmp(result, item->csum, BTRFS_CSUM_SIZE))
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8dcf600..ec68999 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1027,8 +1027,10 @@
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 	btrfs_init_path(path);
-	if (create)
+	if (create) {
 		trans = btrfs_start_transaction(root, 1);
+		WARN_ON(1);
+	}
 
 	ret = btrfs_lookup_file_extent(trans, root, path,
 				       inode->i_ino,
@@ -1055,9 +1057,8 @@
 	/* exact match found, use it, FIXME, deal with extents
 	 * other than the page size
 	 */
-	if (ret == 0) {
+	if (0 && ret == 0) {
 		err = 0;
-		BUG_ON(btrfs_file_extent_disk_num_blocks(item) != 1);
 		if (create &&
 		    btrfs_file_extent_generation(item) != trans->transid) {
 			struct btrfs_key ins;
@@ -1072,7 +1073,6 @@
 			blocknr = ins.objectid;
 
 		}
-		map_bh(result, inode->i_sb, blocknr);
 		btrfs_map_bh_to_logical(root, result, blocknr);
 		goto out;
 	}
@@ -1231,6 +1231,7 @@
 				   struct file *file,
 				   struct page **pages,
 				   size_t num_pages,
+				   u64 extent_offset,
 				   loff_t pos,
 				   size_t write_bytes)
 {
@@ -1250,6 +1251,7 @@
 		trans = btrfs_start_transaction(root, 1);
 		btrfs_csum_file_block(trans, root, inode->i_ino,
 				      pages[i]->index << PAGE_CACHE_SHIFT,
+				      extent_offset,
 				      kmap(pages[i]), PAGE_CACHE_SIZE);
 		kunmap(pages[i]);
 		SetPageChecked(pages[i]);
@@ -1279,7 +1281,8 @@
 			 loff_t pos,
 			 unsigned long first_index,
 			 unsigned long last_index,
-			 size_t write_bytes)
+			 size_t write_bytes,
+			 u64 alloc_extent_start)
 {
 	int i;
 	unsigned long index = pos >> PAGE_CACHE_SHIFT;
@@ -1288,6 +1291,8 @@
 	int err = 0;
 	int ret;
 	int this_write;
+	struct buffer_head *bh;
+	struct buffer_head *head;
 	loff_t isize = i_size_read(inode);
 
 	memset(pages, 0, num_pages * sizeof(struct page *));
@@ -1307,14 +1312,20 @@
 			BUG_ON(ret);
 			lock_page(pages[i]);
 		}
-		ret = nobh_prepare_write(pages[i], offset,
-					 offset + this_write,
-					 btrfs_get_block);
+		create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize,
+				     (1 << BH_Uptodate));
+		head = page_buffers(pages[i]);
+		bh = head;
+		do {
+			err = btrfs_map_bh_to_logical(root, bh,
+						      alloc_extent_start);
+			BUG_ON(err);
+			if (err)
+				goto failed_truncate;
+			bh = bh->b_this_page;
+			alloc_extent_start++;
+		} while (bh != head);
 		pos += this_write;
-		if (ret) {
-			err = ret;
-			goto failed_truncate;
-		}
 		WARN_ON(this_write > write_bytes);
 		write_bytes -= this_write;
 	}
@@ -1343,11 +1354,23 @@
 	struct page *pages[1];
 	unsigned long first_index;
 	unsigned long last_index;
+	u64 start_pos;
+	u64 num_blocks;
+	u64 alloc_extent_start;
+	u64 orig_extent_start;
+	struct btrfs_trans_handle *trans;
 
 	if (file->f_flags & O_DIRECT)
 		return -EINVAL;
 	pos = *ppos;
 
+	start_pos = pos & ~(root->blocksize - 1);
+	/* FIXME */
+	if (start_pos != pos)
+		return -EINVAL;
+	num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
+			inode->i_blkbits;
+
 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
@@ -1362,20 +1385,41 @@
 	mutex_lock(&inode->i_mutex);
 	first_index = pos >> PAGE_CACHE_SHIFT;
 	last_index = (pos + count) >> PAGE_CACHE_SHIFT;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	if (!trans) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+	ret = btrfs_alloc_file_extent(trans, root, inode->i_ino,
+				      start_pos, num_blocks, 1,
+				      &alloc_extent_start);
+	BUG_ON(ret);
+
+	orig_extent_start = start_pos;
+	ret = btrfs_end_transaction(trans, root);
+	BUG_ON(ret);
+	mutex_unlock(&root->fs_info->fs_mutex);
+
 	while(count > 0) {
 		size_t offset = pos & (PAGE_CACHE_SIZE - 1);
 		size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
 		size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
 					PAGE_CACHE_SHIFT;
 		ret = prepare_pages(NULL, root, file, pages, num_pages,
-				    pos, first_index, last_index, write_bytes);
+				    pos, first_index, last_index,
+				    write_bytes, alloc_extent_start);
 		BUG_ON(ret);
+		/* FIXME blocks != pagesize */
+		alloc_extent_start += num_pages;
 		ret = btrfs_copy_from_user(pos, num_pages,
 					   write_bytes, pages, buf);
 		BUG_ON(ret);
 
 		ret = dirty_and_release_pages(NULL, root, file, pages,
-					      num_pages, pos, write_bytes);
+					      num_pages, orig_extent_start,
+					      pos, write_bytes);
 		BUG_ON(ret);
 		btrfs_drop_pages(pages, num_pages);
 
@@ -1387,6 +1431,7 @@
 		balance_dirty_pages_ratelimited(inode->i_mapping);
 		cond_resched();
 	}
+out_unlock:
 	mutex_unlock(&inode->i_mutex);
 out:
 	*ppos = pos;
@@ -1806,8 +1851,6 @@
 				&new_root_item);
 	BUG_ON(ret);
 
-printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objectid, key.offset, key.flags);
-
 	/*
 	 * insert the directory item
 	 */