Btrfs: fix wrong disk size when writing super blocks
total_size will be changed when resizing a device, and disk_total_size
will be changed if resizing is successful. Meanwhile, the on-disk super
blocks of the previous transaction might not be updated. Considering
the consistency of the metadata in the previous transaction, We should
use the size in the previous transaction to check if the super block is
beyond the boundary of the device. Fix it.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index e0033c8..cb7f3fe 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -807,7 +807,7 @@
/* super block bytenr is always the unmapped device bytenr */
dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
- if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
+ if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
return -1;
bh = __bread(superblock_bdev, dev_bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 72dc02e..7877b0f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -168,6 +168,8 @@
dev_replace->srcdev->total_bytes;
dev_replace->tgtdev->disk_total_bytes =
dev_replace->srcdev->disk_total_bytes;
+ dev_replace->tgtdev->commit_total_bytes =
+ dev_replace->srcdev->commit_total_bytes;
dev_replace->tgtdev->bytes_used =
dev_replace->srcdev->bytes_used;
}
@@ -329,6 +331,20 @@
args->start.tgtdev_name[0] == '\0')
return -EINVAL;
+ /*
+ * Here we commit the transaction to make sure commit_total_bytes
+ * of all the devices are updated.
+ */
+ trans = btrfs_attach_transaction(root);
+ if (!IS_ERR(trans)) {
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ } else if (PTR_ERR(trans) != -ENOENT) {
+ return PTR_ERR(trans);
+ }
+
+ /* the disk copy procedure reuses the scrub code */
mutex_lock(&fs_info->volume_mutex);
ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid,
args->start.srcdev_name,
@@ -539,6 +555,8 @@
memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid));
tgt_device->total_bytes = src_device->total_bytes;
tgt_device->disk_total_bytes = src_device->disk_total_bytes;
+ ASSERT(list_empty(&src_device->resized_list));
+ tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->bytes_used = src_device->bytes_used;
if (fs_info->sb->s_bdev == src_device->bdev)
fs_info->sb->s_bdev = tgt_device->bdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dbd7927..0cd18b7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3127,7 +3127,8 @@
for (i = 0; i < max_mirrors; i++) {
bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+ device->commit_total_bytes)
break;
if (wait) {
@@ -3444,7 +3445,7 @@
btrfs_set_stack_device_type(dev_item, dev->type);
btrfs_set_stack_device_id(dev_item, dev->devid);
btrfs_set_stack_device_total_bytes(dev_item,
- dev->disk_total_bytes);
+ dev->commit_total_bytes);
btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
btrfs_set_stack_device_io_align(dev_item, dev->io_align);
btrfs_set_stack_device_io_width(dev_item, dev->io_width);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 72c8981..9d80e37 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2840,7 +2840,8 @@
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >
+ scrub_dev->commit_total_bytes)
break;
ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e336646..2f7c0be 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1868,6 +1868,8 @@
memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
sizeof(*root->fs_info->super_copy));
+ btrfs_update_commit_device_size(root->fs_info);
+
spin_lock(&root->fs_info->trans_lock);
cur_trans->state = TRANS_STATE_UNBLOCKED;
root->fs_info->running_transaction = NULL;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1646659..7b5c042 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -74,6 +74,7 @@
mutex_init(&fs_devs->device_list_mutex);
INIT_LIST_HEAD(&fs_devs->devices);
+ INIT_LIST_HEAD(&fs_devs->resized_devices);
INIT_LIST_HEAD(&fs_devs->alloc_list);
INIT_LIST_HEAD(&fs_devs->list);
@@ -154,6 +155,7 @@
INIT_LIST_HEAD(&dev->dev_list);
INIT_LIST_HEAD(&dev->dev_alloc_list);
+ INIT_LIST_HEAD(&dev->resized_list);
spin_lock_init(&dev->io_lock);
@@ -2168,6 +2170,7 @@
device->sector_size = root->sectorsize;
device->total_bytes = i_size_read(bdev->bd_inode);
device->disk_total_bytes = device->total_bytes;
+ device->commit_total_bytes = device->total_bytes;
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
@@ -2364,6 +2367,8 @@
device->sector_size = root->sectorsize;
device->total_bytes = srcdev->total_bytes;
device->disk_total_bytes = srcdev->disk_total_bytes;
+ ASSERT(list_empty(&srcdev->resized_list));
+ device->commit_total_bytes = srcdev->commit_total_bytes;
device->bytes_used = srcdev->bytes_used;
device->dev_root = fs_info->dev_root;
device->bdev = bdev;
@@ -2448,6 +2453,7 @@
{
struct btrfs_super_block *super_copy =
device->dev_root->fs_info->super_copy;
+ struct btrfs_fs_devices *fs_devices;
u64 old_total = btrfs_super_total_bytes(super_copy);
u64 diff = new_size - device->total_bytes;
@@ -2457,12 +2463,17 @@
device->is_tgtdev_for_dev_replace)
return -EINVAL;
+ fs_devices = device->dev_root->fs_info->fs_devices;
+
btrfs_set_super_total_bytes(super_copy, old_total + diff);
device->fs_devices->total_rw_bytes += diff;
device->total_bytes = new_size;
device->disk_total_bytes = new_size;
btrfs_clear_space_info_full(device->dev_root->fs_info);
+ if (list_empty(&device->resized_list))
+ list_add_tail(&device->resized_list,
+ &fs_devices->resized_devices);
return btrfs_update_device(trans, device);
}
@@ -4011,8 +4022,11 @@
}
lock_chunks(root);
-
device->disk_total_bytes = new_size;
+ if (list_empty(&device->resized_list))
+ list_add_tail(&device->resized_list,
+ &root->fs_info->fs_devices->resized_devices);
+
/* Now btrfs_update_device() will change the on-disk size. */
ret = btrfs_update_device(trans, device);
if (ret) {
@@ -5993,6 +6007,7 @@
device->devid = btrfs_device_id(leaf, dev_item);
device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
device->total_bytes = device->disk_total_bytes;
+ device->commit_total_bytes = device->disk_total_bytes;
device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
device->type = btrfs_device_type(leaf, dev_item);
device->io_align = btrfs_device_io_align(leaf, dev_item);
@@ -6520,3 +6535,26 @@
return 0;
}
+
+/*
+ * Update the size of all devices, which is used for writing out the
+ * super blocks.
+ */
+void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_device *curr, *next;
+
+ if (list_empty(&fs_devices->resized_devices))
+ return;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ lock_chunks(fs_info->dev_root);
+ list_for_each_entry_safe(curr, next, &fs_devices->resized_devices,
+ resized_list) {
+ list_del_init(&curr->resized_list);
+ curr->commit_total_bytes = curr->disk_total_bytes;
+ }
+ unlock_chunks(fs_info->dev_root);
+ mutex_unlock(&fs_devices->device_list_mutex);
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index e15f288..b30d018 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -87,6 +87,21 @@
/* physical drive uuid (or lvm uuid) */
u8 uuid[BTRFS_UUID_SIZE];
+ /*
+ * size of the device on the current transaction
+ *
+ * This variant is update when committing the transaction,
+ * and protected by device_list_mutex
+ */
+ u64 commit_total_bytes;
+
+ /*
+ * used to manage the device which is resized
+ *
+ * It is protected by chunk_lock.
+ */
+ struct list_head resized_list;
+
/* for sending down flush barriers */
int nobarriers;
struct bio *flush_bio;
@@ -136,6 +151,7 @@
struct mutex device_list_mutex;
struct list_head devices;
+ struct list_head resized_devices;
/* devices not currently being allocated */
struct list_head alloc_list;
struct list_head list;
@@ -402,4 +418,6 @@
{
btrfs_dev_stat_set(dev, index, 0);
}
+
+void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
#endif