md/raid1: delay reads that could overtake behind-writes.
When a raid1 array is configured to support write-behind
on some devices, it normally only reads from other devices.
If all devices are write-behind (because the rest have failed)
it is possible for a read request to be serviced before a
behind-write request, which would appear as data corruption.
So when forced to read from a WriteMostly device, wait for any
write-behind to complete, and don't start any more behind-writes.
Signed-off-by: NeilBrown <neilb@suse.de>
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2e08e48..cb2da87 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -857,6 +857,15 @@
}
mirror = conf->mirrors + rdisk;
+ if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+ bitmap) {
+ /* Reading from a write-mostly device must
+ * take care not to over-take any writes
+ * that are 'behind'
+ */
+ wait_event(bitmap->behind_wait,
+ atomic_read(&bitmap->behind_writes) == 0);
+ }
r1_bio->read_disk = rdisk;
read_bio = bio_clone(bio, GFP_NOIO);
@@ -934,10 +943,14 @@
set_bit(R1BIO_Degraded, &r1_bio->state);
}
- /* do behind I/O ? */
+ /* do behind I/O ?
+ * Not if there are too many, or cannot allocate memory,
+ * or a reader on WriteMostly is waiting for behind writes
+ * to flush */
if (bitmap &&
(atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) &&
+ !waitqueue_active(&bitmap->behind_wait) &&
(behind_pages = alloc_behind_pages(bio)) != NULL)
set_bit(R1BIO_BehindIO, &r1_bio->state);
@@ -2144,15 +2157,13 @@
{
conf_t *conf = mddev->private;
struct bitmap *bitmap = mddev->bitmap;
- int behind_wait = 0;
/* wait for behind writes to complete */
- while (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
- behind_wait++;
- printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(HZ); /* wait a second */
+ if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
+ printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop.\n", mdname(mddev));
/* need to kick something here to make sure I/O goes? */
+ wait_event(bitmap->behind_wait,
+ atomic_read(&bitmap->behind_writes) == 0);
}
raise_barrier(conf);