md-cluster: Use a small window for resync
Suspending the entire device for resync could take too long. Resync
in small chunks.
cluster's resync window (32M) is maintained in r1conf as
cluster_sync_low and cluster_sync_high and processed in
raid1's sync_request(). If the current resync is outside the cluster
resync window:
1. Set the cluster_sync_low to curr_resync_completed.
2. Check if the sync will fit in the new window, if not issue a
wait_barrier() and set cluster_sync_low to sector_nr.
3. Set cluster_sync_high to cluster_sync_low + resync_window.
4. Send a message to all nodes so they may add it in their suspension
list.
bitmap_cond_end_sync is modified to allow to force a sync inorder
to get the curr_resync_completed uptodate with the sector passed.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: NeilBrown <neilb@suse.de>
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index e9d3ee7..4f22e91 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1570,7 +1570,7 @@
}
EXPORT_SYMBOL(bitmap_close_sync);
-void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
+void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
{
sector_t s = 0;
sector_t blocks;
@@ -1581,7 +1581,7 @@
bitmap->last_end_sync = jiffies;
return;
}
- if (time_before(jiffies, (bitmap->last_end_sync
+ if (!force && time_before(jiffies, (bitmap->last_end_sync
+ bitmap->mddev->bitmap_info.daemon_sleep)))
return;
wait_event(bitmap->mddev->recovery_wait,
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 8731fa0..7d5c3a6 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -257,7 +257,7 @@
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap);
-void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
+void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev);
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 4a965f2..b94a2e6 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -802,15 +802,6 @@
return cinfo->slot_number - 1;
}
-static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
-{
- struct md_cluster_info *cinfo = mddev->cluster_info;
-
- add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
- /* Re-acquire the lock to refresh LVB */
- dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
-}
-
static int metadata_update_start(struct mddev *mddev)
{
return lock_comm(mddev->cluster_info);
@@ -836,45 +827,25 @@
return dlm_unlock_sync(cinfo->token_lockres);
}
-static int resync_send(struct mddev *mddev, enum msg_type type,
- sector_t lo, sector_t hi)
+static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg;
int slot = cinfo->slot_number - 1;
+ add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
+ /* Re-acquire the lock to refresh LVB */
+ dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
(unsigned long long)lo,
(unsigned long long)hi);
- resync_info_update(mddev, lo, hi);
- cmsg.type = cpu_to_le32(type);
+ cmsg.type = cpu_to_le32(RESYNCING);
cmsg.slot = cpu_to_le32(slot);
cmsg.low = cpu_to_le64(lo);
cmsg.high = cpu_to_le64(hi);
return sendmsg(cinfo, &cmsg);
}
-static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
-{
- pr_info("%s:%d\n", __func__, __LINE__);
- return resync_send(mddev, RESYNCING, lo, hi);
-}
-
-static void resync_finish(struct mddev *mddev)
-{
- struct md_cluster_info *cinfo = mddev->cluster_info;
- struct cluster_msg cmsg;
- int slot = cinfo->slot_number - 1;
-
- pr_info("%s:%d\n", __func__, __LINE__);
- resync_send(mddev, RESYNCING, 0, 0);
- if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
- cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
- cmsg.slot = cpu_to_le32(slot);
- sendmsg(cinfo, &cmsg);
- }
-}
-
static int area_resyncing(struct mddev *mddev, int direction,
sector_t lo, sector_t hi)
{
@@ -997,8 +968,6 @@
.leave = leave,
.slot_number = slot_number,
.resync_info_update = resync_info_update,
- .resync_start = resync_start,
- .resync_finish = resync_finish,
.metadata_update_start = metadata_update_start,
.metadata_update_finish = metadata_update_finish,
.metadata_update_cancel = metadata_update_cancel,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 00defe2..f5bdc0c 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -12,9 +12,7 @@
int (*join)(struct mddev *mddev, int nodes);
int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev);
- void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
- int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
- void (*resync_finish)(struct mddev *mddev);
+ int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev);
int (*metadata_update_cancel)(struct mddev *mddev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1e1bdd8..9798a99 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7805,9 +7805,6 @@
md_new_event(mddev);
update_time = jiffies;
- if (mddev_is_clustered(mddev))
- md_cluster_ops->resync_start(mddev, j, max_sectors);
-
blk_start_plug(&plug);
while (j < max_sectors) {
sector_t sectors;
@@ -7871,8 +7868,6 @@
j = max_sectors;
if (j > 2)
mddev->curr_resync = j;
- if (mddev_is_clustered(mddev))
- md_cluster_ops->resync_info_update(mddev, j, max_sectors);
mddev->curr_mark_cnt = io_sectors;
if (last_check == 0)
/* this is the earliest that rebuild will be
@@ -7979,9 +7974,6 @@
}
}
skip:
- if (mddev_is_clustered(mddev))
- md_cluster_ops->resync_finish(mddev);
-
set_bit(MD_CHANGE_DEVS, &mddev->flags);
spin_lock(&mddev->lock);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 049df6c..1dd13bb 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -90,6 +90,8 @@
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
+#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
+#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
@@ -2488,6 +2490,13 @@
bitmap_close_sync(mddev->bitmap);
close_sync(conf);
+
+ if (mddev_is_clustered(mddev)) {
+ conf->cluster_sync_low = 0;
+ conf->cluster_sync_high = 0;
+ /* Send zeros to mark end of resync */
+ md_cluster_ops->resync_info_update(mddev, 0, 0);
+ }
return 0;
}
@@ -2508,7 +2517,12 @@
return sync_blocks;
}
- bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+ /* we are incrementing sector_nr below. To be safe, we check against
+ * sector_nr + two times RESYNC_SECTORS
+ */
+
+ bitmap_cond_end_sync(mddev->bitmap, sector_nr,
+ mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
raise_barrier(conf, sector_nr);
@@ -2699,6 +2713,16 @@
bio_full:
r1_bio->sectors = nr_sectors;
+ if (mddev_is_clustered(mddev) &&
+ conf->cluster_sync_high < sector_nr + nr_sectors) {
+ conf->cluster_sync_low = mddev->curr_resync_completed;
+ conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
+ /* Send resync message */
+ md_cluster_ops->resync_info_update(mddev,
+ conf->cluster_sync_low,
+ conf->cluster_sync_high);
+ }
+
/* For a user-requested sync, we read all readable devices and do a
* compare
*/
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index c52d713..61c39b3 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -111,6 +111,13 @@
* the new thread here until we fully activate the array.
*/
struct md_thread *thread;
+
+ /* Keep track of cluster resync window to send to other
+ * nodes.
+ */
+ sector_t cluster_sync_low;
+ sector_t cluster_sync_high;
+
};
/*
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7c99a40..5f30b75 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3137,7 +3137,7 @@
/* resync. Schedule a read for every block at this virt offset */
int count = 0;
- bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+ bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
if (!bitmap_start_sync(mddev->bitmap, sector_nr,
&sync_blocks, mddev->degraded) &&
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 49bb8d3..5b79770 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5613,7 +5613,7 @@
return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
}
- bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+ bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
if (sh == NULL) {