[PATCH] md: count corrected read errors per drive
Store this total in superblock (As appropriate), and make it available to
userspace via sysfs.
Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/Documentation/md.txt b/Documentation/md.txt
index fd43fd2..a3eadf8 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -222,6 +222,17 @@
of being recoverred to
This list make grow in future.
+ errors
+ An approximate count of read errors that have been detected on
+ this device but have not caused the device to be evicted from
+ the array (either because they were corrected or because they
+ happened while the array was read-only). When using version-1
+ metadata, this value persists across restarts of the array.
+
+ This value can be written while assembling an array thus
+ providing an ongoing count for arrays with metadata managed by
+ userspace.
+
An active md device will also contain and entry for each active device
in the array. These are named
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 594d8c3..32a4e23 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1000,6 +1000,7 @@
}
rdev->preferred_minor = 0xffff;
rdev->data_offset = le64_to_cpu(sb->data_offset);
+ atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
@@ -1139,6 +1140,8 @@
else
sb->resync_offset = cpu_to_le64(0);
+ sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
+
if (mddev->bitmap && mddev->bitmap_file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -1592,9 +1595,30 @@
}
static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super);
+static ssize_t
+errors_show(mdk_rdev_t *rdev, char *page)
+{
+ return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
+}
+
+static ssize_t
+errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long n = simple_strtoul(buf, &e, 10);
+ if (*buf && (*e == 0 || *e == '\n')) {
+ atomic_set(&rdev->corrected_errors, n);
+ return len;
+ }
+ return -EINVAL;
+}
+static struct rdev_sysfs_entry rdev_errors =
+__ATTR(errors, 0644, errors_show, errors_store);
+
static struct attribute *rdev_default_attrs[] = {
&rdev_state.attr,
&rdev_super.attr,
+ &rdev_errors.attr,
NULL,
};
static ssize_t
@@ -1674,6 +1698,7 @@
rdev->data_offset = 0;
atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0);
+ atomic_set(&rdev->corrected_errors, 0);
size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
if (!size) {
@@ -4729,7 +4754,7 @@
int num = simple_strtoul(val, &e, 10);
if (*val && (*e == '\0' || *e == '\n')) {
start_readonly = num;
- return 0;;
+ return 0;
}
return -EINVAL;
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 181c961..a06ff91 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1265,6 +1265,7 @@
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
+ atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev->bdev,
sect + rdev->data_offset,
s<<9,
@@ -1463,6 +1464,7 @@
d = conf->raid_disks;
d--;
rdev = conf->mirrors[d].rdev;
+ atomic_add(s, &rdev->corrected_errors);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev->bdev,
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 201dc71..9e658e5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1122,9 +1122,13 @@
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R10BIO_Uptodate, &r10_bio->state);
- else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
- md_error(r10_bio->mddev,
- conf->mirrors[d].rdev);
+ else {
+ atomic_add(r10_bio->sectors,
+ &conf->mirrors[d].rdev->corrected_errors);
+ if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
+ md_error(r10_bio->mddev,
+ conf->mirrors[d].rdev);
+ }
/* for reconstruct, we always reschedule after a read.
* for resync, only after all reads
@@ -1430,6 +1434,7 @@
sl--;
d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
+ atomic_add(s, &rdev->corrected_errors);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev->bdev,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9cc844f..54f4a98 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1400,6 +1400,9 @@
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
+ if (rw == WRITE &&
+ test_bit(R5_ReWrite, &sh->dev[i].flags))
+ atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
generic_make_request(bi);
} else {
if (rw == 1)
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 84dd875..8c823d6 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1562,6 +1562,9 @@
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
+ if (rw == WRITE &&
+ test_bit(R5_ReWrite, &sh->dev[i].flags))
+ atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
generic_make_request(bi);
} else {
if (rw == 1)
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 6864631..68b929c 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -95,6 +95,10 @@
atomic_t read_errors; /* number of consecutive read errors that
* we have tried to ignore.
*/
+ atomic_t corrected_errors; /* number of corrected read errors,
+ * for reporting to userspace and storing
+ * in superblock.
+ */
};
struct mddev_s