Btrfs: early extent mapping support
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 6336021..df065dd 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,8 +4,8 @@
.c.o:
$(CC) $(CFLAGS) -c $<
-ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h
- gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o
+ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h mkfs.o
+ gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o mkfs.o
clean:
rm ctree *.o
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6b64f49..2177744 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -5,6 +5,8 @@
#include "ctree.h"
#include "disk-io.h"
+static int refill_alloc_extent(struct ctree_root *root);
+
static inline void init_path(struct ctree_path *p)
{
memset(p, 0, sizeof(*p));
@@ -29,7 +31,7 @@
{
unsigned int nr = leaf->header.nritems;
if (nr == 0)
- return ARRAY_SIZE(leaf->data);
+ return sizeof(leaf->data);
return leaf->items[nr-1].offset;
}
@@ -421,7 +423,7 @@
* due to splitting. Once we've done all the splitting required
* do the inserts based on the data in the bal array.
*/
- memset(bal, 0, ARRAY_SIZE(bal));
+ memset(bal, 0, sizeof(bal));
while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
c = &t->node;
if (push_node_left(root, path,
@@ -756,6 +758,7 @@
if (leaf_free_space(leaf) < 0)
BUG();
release_path(root, &path);
+ refill_alloc_extent(root);
return 0;
}
@@ -884,6 +887,135 @@
return 0;
}
+int next_leaf(struct ctree_root *root, struct ctree_path *path)
+{
+ int slot;
+ int level = 1;
+ u64 blocknr;
+ struct tree_buffer *c;
+ struct tree_buffer *next;
+
+ while(level < MAX_LEVEL) {
+ if (!path->nodes[level])
+ return -1;
+ slot = path->slots[level] + 1;
+ c = path->nodes[level];
+ if (slot >= c->node.header.nritems) {
+ level++;
+ continue;
+ }
+ blocknr = c->node.blockptrs[slot];
+ next = read_tree_block(root, blocknr);
+ break;
+ }
+ path->slots[level] = slot;
+ while(1) {
+ level--;
+ c = path->nodes[level];
+ tree_block_release(root, c);
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ if (!level)
+ break;
+ next = read_tree_block(root, next->node.blockptrs[0]);
+ }
+ return 0;
+}
+
+int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start,
+ u64 search_end, u64 owner, struct key *ins)
+{
+ struct ctree_path path;
+ struct key *key;
+ int ret;
+ u64 hole_size = 0;
+ int slot = 0;
+ u64 last_block;
+ int start_found = 0;
+ struct leaf *l;
+ struct extent_item extent_item;
+
+ init_path(&path);
+ ins->objectid = search_start;
+ ins->offset = 0;
+ ins->flags = 0;
+
+ ret = search_slot(root, ins, &path);
+ while (1) {
+ l = &path.nodes[0]->leaf;
+ slot = path.slots[0];
+ if (!l) {
+ // FIXME allocate root
+ }
+ if (slot >= l->header.nritems) {
+ ret = next_leaf(root, &path);
+ if (ret == 0)
+ continue;
+ if (!start_found) {
+ ins->objectid = search_start;
+ ins->offset = num_blocks;
+ hole_size = search_end - search_start;
+ goto insert;
+ }
+ ins->objectid = last_block;
+ ins->offset = num_blocks;
+ hole_size = search_end - last_block;
+ goto insert;
+ }
+ key = &l->items[slot].key;
+ if (start_found) {
+ hole_size = key->objectid - last_block;
+ if (hole_size > num_blocks) {
+ ins->objectid = last_block;
+ ins->offset = num_blocks;
+ goto insert;
+ }
+ } else
+ start_found = 1;
+ last_block = key->objectid + key->offset;
+ path.slots[0]++;
+ printf("last block is not %lu\n", last_block);
+ }
+ // FIXME -ENOSPC
+insert:
+ extent_item.refs = 1;
+ extent_item.owner = owner;
+ ret = insert_item(root, ins, &extent_item, sizeof(extent_item));
+ return ret;
+}
+
+static int refill_alloc_extent(struct ctree_root *root)
+{
+ struct alloc_extent *ae = root->alloc_extent;
+ struct key key;
+ int ret;
+ int min_blocks = MAX_LEVEL * 2;
+
+ printf("refill alloc root %p, numused %lu total %lu\n", root, ae->num_used, ae->num_blocks);
+ if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used >
+ min_blocks)
+ return 0;
+ ae = root->reserve_extent;
+ if (ae->num_blocks > ae->num_used) {
+ if (root->alloc_extent->num_blocks == 0) {
+ /* we should swap reserve/alloc_extent when alloc
+ * fills up
+ */
+ BUG();
+ }
+ if (ae->num_blocks - ae->num_used < min_blocks)
+ BUG();
+ return 0;
+ }
+ // FIXME, this recurses
+ ret = alloc_extent(root->extent_root,
+ min_blocks * 2, 0, (unsigned long)-1, 0, &key);
+ ae->blocknr = key.objectid;
+ ae->num_blocks = key.offset;
+ ae->num_used = 0;
+ return ret;
+}
+
void print_leaf(struct leaf *l)
{
int i;
@@ -948,8 +1080,8 @@
/* for testing only */
int next_key(int i, int max_key) {
- return rand() % max_key;
- // return i;
+ // return rand() % max_key;
+ return i;
}
int main() {
@@ -960,7 +1092,7 @@
int i;
int num;
int ret;
- int run_size = 25000;
+ int run_size = 256;
int max_key = 100000000;
int tree_size = 0;
struct ctree_path path;
@@ -980,10 +1112,20 @@
ins.objectid = num;
ins.offset = 0;
ins.flags = 0;
+ printf("insert %d\n", i);
ret = insert_item(root, &ins, buf, strlen(buf));
if (!ret)
tree_size++;
+ printf("done insert %d\n", i);
}
+ printf("root used: %lu\n", root->alloc_extent->num_used);
+ printf("root tree\n");
+ print_tree(root, root->node);
+ printf("map tree\n");
+ printf("map used: %lu\n", root->extent_root->alloc_extent->num_used);
+ print_tree(root->extent_root, root->extent_root->node);
+ exit(1);
+
close_ctree(root);
root = open_ctree("dbfile");
printf("starting search\n");
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 586bf18..b737925 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1,7 +1,7 @@
#ifndef __CTREE__
#define __CTREE__
-#define CTREE_BLOCKSIZE 4096
+#define CTREE_BLOCKSIZE 256
struct key {
u64 objectid;
@@ -22,18 +22,41 @@
#define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \
(sizeof(struct key) + sizeof(u64)))
-#define LEVEL_BITS 3
-#define MAX_LEVEL (1 << LEVEL_BITS)
+#define MAX_LEVEL 8
#define node_level(f) ((f) & (MAX_LEVEL-1))
#define is_leaf(f) (node_level(f) == 0)
struct tree_buffer;
+
+struct alloc_extent {
+ u64 blocknr;
+ u64 num_blocks;
+ u64 num_used;
+} __attribute__ ((__packed__));
+
struct ctree_root {
struct tree_buffer *node;
+ struct ctree_root *extent_root;
+ struct alloc_extent *alloc_extent;
+ struct alloc_extent *reserve_extent;
int fp;
struct radix_tree_root cache_radix;
+ struct alloc_extent ai1;
+ struct alloc_extent ai2;
};
+struct ctree_root_info {
+ u64 fsid[2]; /* FS specific uuid */
+ u64 blocknr; /* blocknr of this block */
+ u64 objectid; /* inode number of this root */
+ u64 tree_root; /* the tree root */
+ u32 csum;
+ u32 ham;
+ struct alloc_extent alloc_extent;
+ struct alloc_extent reserve_extent;
+ u64 snapuuid[2]; /* root specific uuid */
+} __attribute__ ((__packed__));
+
struct item {
struct key key;
u16 offset;
@@ -55,6 +78,11 @@
u64 blockptrs[NODEPTRS_PER_BLOCK];
} __attribute__ ((__packed__));
+struct extent_item {
+ u32 refs;
+ u64 owner;
+} __attribute__ ((__packed__));
+
struct ctree_path {
struct tree_buffer *nodes[MAX_LEVEL];
int slots[MAX_LEVEL];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8d51a07..653f18a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -12,25 +12,27 @@
static int allocated_blocks = 0;
-struct ctree_header {
- u64 root_block;
-} __attribute__ ((__packed__));
-
static int get_free_block(struct ctree_root *root, u64 *block)
{
struct stat st;
int ret;
+ if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks)
+ return -1;
+
+ *block = root->alloc_extent->blocknr + root->alloc_extent->num_used;
+ root->alloc_extent->num_used += 1;
+ if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) {
+ struct alloc_extent *ae = root->alloc_extent;
+ root->alloc_extent = root->reserve_extent;
+ root->reserve_extent = ae;
+ ae->num_blocks = 0;
+ }
st.st_size = 0;
ret = fstat(root->fp, &st);
- if (st.st_size > sizeof(struct ctree_header)) {
- *block = (st.st_size -
- sizeof(struct ctree_header)) / CTREE_BLOCKSIZE;
- } else {
- *block = 0;
- }
- ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) *
- CTREE_BLOCKSIZE);
+ if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE)
+ ret = ftruncate(root->fp,
+ (*block + 1) * CTREE_BLOCKSIZE);
return ret;
}
@@ -72,7 +74,7 @@
struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr)
{
- loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
+ loff_t offset = blocknr * CTREE_BLOCKSIZE;
struct tree_buffer *buf;
int ret;
@@ -101,7 +103,7 @@
int write_tree_block(struct ctree_root *root, struct tree_buffer *buf)
{
u64 blocknr = buf->blocknr;
- loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
+ loff_t offset = blocknr * CTREE_BLOCKSIZE;
int ret;
if (buf->blocknr != buf->node.header.blocknr)
@@ -114,11 +116,32 @@
return 0;
}
+struct ctree_super_block {
+ struct ctree_root_info root_info;
+ struct ctree_root_info extent_info;
+} __attribute__ ((__packed__));
+
+static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root,
+ struct ctree_root_info *info, int fp)
+{
+ root->fp = fp;
+ root->node = read_tree_block(root, info->tree_root);
+ root->extent_root = extent_root;
+ memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent));
+ memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent));
+ root->alloc_extent = &root->ai1;
+ root->reserve_extent = &root->ai2;
+ INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL);
+ printf("setup done reading root %p, used %lu\n", root, root->alloc_extent->num_used);
+ return 0;
+}
+
struct ctree_root *open_ctree(char *filename)
{
struct ctree_root *root = malloc(sizeof(struct ctree_root));
+ struct ctree_root *extent_root = malloc(sizeof(struct ctree_root));
+ struct ctree_super_block super;
int fp;
- u64 root_block;
int ret;
fp = open(filename, O_CREAT | O_RDWR);
@@ -126,14 +149,20 @@
free(root);
return NULL;
}
- root->fp = fp;
- INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL);
- ret = pread(fp, &root_block, sizeof(u64), 0);
- if (ret == sizeof(u64)) {
- printf("reading root node at block %lu\n", root_block);
- root->node = read_tree_block(root, root_block);
- } else
- root->node = NULL;
+ ret = pread(fp, &super, sizeof(struct ctree_super_block),
+ CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE));
+ if (ret == 0) {
+ ret = mkfs(fp);
+ if (ret)
+ return NULL;
+ ret = pread(fp, &super, sizeof(struct ctree_super_block),
+ CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE));
+ if (ret != sizeof(struct ctree_super_block))
+ return NULL;
+ }
+ BUG_ON(ret < 0);
+ __setup_root(root, extent_root, &super.root_info, fp);
+ __setup_root(extent_root, extent_root, &super.extent_info, fp);
return root;
}
@@ -160,6 +189,7 @@
void tree_block_release(struct ctree_root *root, struct tree_buffer *buf)
{
+ return;
buf->count--;
if (buf->count == 0) {
if (!radix_tree_lookup(&root->cache_radix, buf->blocknr))
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index ee95fa0..41acb65 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -17,5 +17,8 @@
void tree_block_release(struct ctree_root *root, struct tree_buffer *buf);
struct tree_buffer *alloc_free_block(struct ctree_root *root);
int update_root_block(struct ctree_root *root);
+int mkfs(int fd);
+
+#define CTREE_SUPER_INFO_OFFSET(bs) (16 * (bs))
#endif