[PATCH] md: Improve locking around error handling
The error handling routines don't use proper locking, and so two concurrent
errors could trigger a problem.
So:
- use test-and-set and test-and-clear to synchonise
the In_sync bits with the ->degraded count
- use the spinlock to protect updates to the
degraded count (could use an atomic_t but that
would be a bigger change in code, and isn't
really justified)
- remove un-necessary locking in raid5
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3fc9ec2..99c4e03 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -959,14 +959,16 @@
* normal single drive
*/
return;
- if (test_bit(In_sync, &rdev->flags)) {
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
}
- clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
@@ -1022,9 +1024,11 @@
mdk_rdev_t *rdev = conf->mirrors[i].rdev;
if (rdev
&& !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)) {
+ && !test_and_set_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--;
- set_bit(In_sync, &rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
}
@@ -2048,7 +2052,7 @@
mirror_info_t *newmirrors;
conf_t *conf = mddev_to_conf(mddev);
int cnt, raid_disks;
-
+ unsigned long flags;
int d, d2;
/* Cannot change chunk_size, layout, or level */
@@ -2107,7 +2111,9 @@
kfree(conf->poolinfo);
conf->poolinfo = newpoolinfo;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (raid_disks - conf->raid_disks);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0;