tree c84a6b8d39e2e85b521f2b558575c48a67f33620
parent 074f317ef49ee7941db87abfb1cfc74ba2bcc6e5
author Lars Marowsky-Bree <[EMAIL PROTECTED]> Sun, 07 Aug 2005 01:26:00 -0300
committer Marcelo Tosatti <[EMAIL PROTECTED]> Sun, 07 Aug 2005 01:26:00 -0300

[PATCH] fix oops when starting md multipath 2.4 kernel

The device major/minor numbers no longer match up values recorded in the
descriptor array in the md superblock. Because of the exception made in
the current code, the descriptor entries are removed and although the
real devices are present and accounted for, they are kicked out from
the array. This leaves the array with zero devices. When multipath_run()
is invoked, it blows up expecting to have had some disks.

Lars Marowsky-Bree suggested some patches for md multipath in 2002 but
never made it to mainline 2.4 kernel:

http://marc.theaimsgroup.com/?l=3Dlinux-kernel&m=3D103355467608953&w=3D=2
That patch is large and most of it is not requried for this particular
problem.  The section that reinitializes the descriptor array from
current rdevs for the case of multipath will resolve this issue of
device names shift.

 drivers/md/md.c |  252 +++++++++++++++++++++++++++++---------------------------
 1 files changed, 134 insertions(+), 118 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1271,148 +1271,164 @@ static int analyze_sbs(mddev_t * mddev)
        memcpy (sb, freshest->sb, sizeof(*sb));
 
        /*
-        * at this point we have picked the 'best' superblock
-        * from all available superblocks.
-        * now we validate this superblock and kick out possibly
-        * failed disks.
+        * For multipathing, lots of things are different from "true"
+        * RAIDs.
+        * All rdev's could be read, so they are no longer faulty.
+        * As there is just one sb, trying to find changed devices via the
+        * this_disk pointer is useless too.
+        *
+        * [EMAIL PROTECTED], 2002-09-12
         */
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               /*
-                * Kick all non-fresh devices
-                */
-               __u64 ev1, ev2;
-               ev1 = md_event(rdev->sb);
-               ev2 = md_event(sb);
-               ++ev1;
-               if (ev1 < ev2) {
-                       printk(KERN_WARNING "md: kicking non-fresh %s from 
array!\n",
-                                               partition_name(rdev->dev));
-                       kick_rdev_from_array(rdev);
-                       continue;
-               }
-       }
 
-       /*
-        * Fix up changed device names ... but only if this disk has a
-        * recent update time. Use faulty checksum ones too.
-        */
-       if (mddev->sb->level != -4)
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               __u64 ev1, ev2, ev3;
-               if (rdev->faulty || rdev->alias_device) {
-                       MD_BUG();
-                       goto abort;
-               }
-               ev1 = md_event(rdev->sb);
-               ev2 = md_event(sb);
-               ev3 = ev2;
-               --ev3;
-               if ((rdev->dev != rdev->old_dev) &&
-                       ((ev1 == ev2) || (ev1 == ev3))) {
+       if (sb->level == -4) {
+               int desc_nr = 0;
+
+               /* ... and initialize from the current rdevs instead */
+               ITERATE_RDEV(mddev,rdev,tmp) {
                        mdp_disk_t *desc;
 
-                       printk(KERN_WARNING "md: device name has changed from 
%s to %s since last import!\n",
-                              partition_name(rdev->old_dev), 
partition_name(rdev->dev));
-                       if (rdev->desc_nr == -1) {
-                               MD_BUG();
-                               goto abort;
-                       }
+                       rdev->desc_nr=desc_nr;
+
                        desc = &sb->disks[rdev->desc_nr];
-                       if (rdev->old_dev != MKDEV(desc->major, desc->minor)) {
-                               MD_BUG();
-                               goto abort;
-                       }
-                       desc->major = MAJOR(rdev->dev);
-                       desc->minor = MINOR(rdev->dev);
-                       desc = &rdev->sb->this_disk;
+
+                       desc->number = desc_nr;
                        desc->major = MAJOR(rdev->dev);
                        desc->minor = MINOR(rdev->dev);
-               }
-       }
+                       desc->raid_disk = desc_nr;
 
-       /*
-        * Remove unavailable and faulty devices ...
-        *
-        * note that if an array becomes completely unrunnable due to
-        * missing devices, we do not write the superblock back, so the
-        * administrator has a chance to fix things up. The removal thus
-        * only happens if it's nonfatal to the contents of the array.
-        */
-       for (i = 0; i < MD_SB_DISKS; i++) {
-               int found;
-               mdp_disk_t *desc;
-               kdev_t dev;
+                       /* We could read from it, so it isn't faulty
+                        * any longer */
+                       if (disk_faulty(desc))
+                               mark_disk_spare(desc);
 
-               desc = sb->disks + i;
-               dev = MKDEV(desc->major, desc->minor);
+                       memcpy(&rdev->sb->this_disk,desc,sizeof(*desc));
+
+                       desc_nr++;
+               }
 
+               /* Kick out all old info about disks we used to have,
+                * if any */
+               for (i = desc_nr; i < MD_SB_DISKS; i++)
+                       memset(&(sb->disks[i]),0,sizeof(mdp_disk_t));
+       } else {
                /*
-                * We kick faulty devices/descriptors immediately.
-                *
-                * Note: multipath devices are a special case.  Since we
-                * were able to read the superblock on the path, we don't
-                * care if it was previously marked as faulty, it's up now
-                * so enable it.
+                * at this point we have picked the 'best' superblock
+                * from all available superblocks.
+                * now we validate this superblock and kick out possibly
+                * failed disks.
                 */
-               if (disk_faulty(desc) && mddev->sb->level != -4) {
-                       found = 0;
-                       ITERATE_RDEV(mddev,rdev,tmp) {
-                               if (rdev->desc_nr != desc->number)
-                                       continue;
-                               printk(KERN_WARNING "md%d: kicking faulty 
%s!\n",
-                                       mdidx(mddev),partition_name(rdev->dev));
-                               kick_rdev_from_array(rdev);
-                               found = 1;
-                               break;
-                       }
-                       if (!found) {
-                               if (dev == MKDEV(0,0))
-                                       continue;
-                               printk(KERN_WARNING "md%d: removing former 
faulty %s!\n",
-                                       mdidx(mddev), partition_name(dev));
-                       }
-                       remove_descriptor(desc, sb);
-                       continue;
-               } else if (disk_faulty(desc)) {
+               ITERATE_RDEV(mddev,rdev,tmp) {
                        /*
-                        * multipath entry marked as faulty, unfaulty it
+                        * Kick all non-fresh devices
                         */
-                       rdev = find_rdev(mddev, dev);
-                       if(rdev)
-                               mark_disk_spare(desc);
-                       else
-                               remove_descriptor(desc, sb);
+                       __u64 ev1, ev2;
+                       ev1 = md_event(rdev->sb);
+                       ev2 = md_event(sb);
+                       ++ev1;
+                       if (ev1 < ev2) {
+                               printk(KERN_WARNING "md: kicking non-fresh %s 
from array!\n",
+                                                       
partition_name(rdev->dev));
+                               kick_rdev_from_array(rdev);
+                               continue;
+                       }
                }
 
-               if (dev == MKDEV(0,0))
-                       continue;
                /*
-                * Is this device present in the rdev ring?
+                * Fix up changed device names ... but only if this disk has a
+                * recent update time. Use faulty checksum ones too.
                 */
-               found = 0;
                ITERATE_RDEV(mddev,rdev,tmp) {
+                       __u64 ev1, ev2, ev3;
+                       if (rdev->faulty || rdev->alias_device) {
+                               MD_BUG();
+                               goto abort;
+                       }
+                       ev1 = md_event(rdev->sb);
+                       ev2 = md_event(sb);
+                       ev3 = ev2;
+                       --ev3;
+                       if ((rdev->dev != rdev->old_dev) &&
+                               ((ev1 == ev2) || (ev1 == ev3))) {
+                               mdp_disk_t *desc;
+
+                               printk(KERN_WARNING "md: device name has 
changed from %s to %s since last import!\n",
+                                      partition_name(rdev->old_dev), 
partition_name(rdev->dev));
+                               if (rdev->desc_nr == -1) {
+                                       MD_BUG();
+                                       goto abort;
+                               }
+                               desc = &sb->disks[rdev->desc_nr];
+                               if (rdev->old_dev != MKDEV(desc->major, 
desc->minor)) {
+                                       MD_BUG();
+                                       goto abort;
+                               }
+                               desc->major = MAJOR(rdev->dev);
+                               desc->minor = MINOR(rdev->dev);
+                               desc = &rdev->sb->this_disk;
+                               desc->major = MAJOR(rdev->dev);
+                               desc->minor = MINOR(rdev->dev);
+                       }
+               }
+
+               /*
+                * Remove unavailable and faulty devices ...
+                *
+                * note that if an array becomes completely unrunnable due to
+                * missing devices, we do not write the superblock back, so the
+                * administrator has a chance to fix things up. The removal thus
+                * only happens if it's nonfatal to the contents of the array.
+                */
+               for (i = 0; i < MD_SB_DISKS; i++) {
+                       int found;
+                       mdp_disk_t *desc;
+                       kdev_t dev;
+
+                       desc = sb->disks + i;
+                       dev = MKDEV(desc->major, desc->minor);
+
                        /*
-                        * Multi-path IO special-case: since we have no
-                        * this_disk descriptor at auto-detect time,
-                        * we cannot check rdev->number.
-                        * We can check the device though.
+                        * We kick faulty devices/descriptors immediately.
                         */
-                       if ((sb->level == -4) && (rdev->dev ==
-                                       MKDEV(desc->major,desc->minor))) {
-                               found = 1;
-                               break;
+                       if (disk_faulty(desc)) {
+                               found = 0;
+                               ITERATE_RDEV(mddev,rdev,tmp) {
+                                       if (rdev->desc_nr != desc->number)
+                                               continue;
+                                       printk(KERN_WARNING "md%d: kicking 
faulty %s!\n",
+                                               
mdidx(mddev),partition_name(rdev->dev));
+                                       kick_rdev_from_array(rdev);
+                                       found = 1;
+                                       break;
+                               }
+                               if (!found) {
+                                       if (dev == MKDEV(0,0))
+                                               continue;
+                                       printk(KERN_WARNING "md%d: removing 
former faulty %s!\n",
+                                               mdidx(mddev), 
partition_name(dev));
+                               }
+                               remove_descriptor(desc, sb);
+                               continue;
                        }
-                       if (rdev->desc_nr == desc->number) {
-                               found = 1;
-                               break;
+
+                       if (dev == MKDEV(0,0))
+                               continue;
+                       /*
+                        * Is this device present in the rdev ring?
+                        */
+                       found = 0;
+                       ITERATE_RDEV(mddev,rdev,tmp) {
+                               if (rdev->desc_nr == desc->number) {
+                                       found = 1;
+                                       break;
+                               }
                        }
-               }
-               if (found)
-                       continue;
+                       if (found)
+                               continue;
 
-               printk(KERN_WARNING "md%d: former device %s is unavailable, 
removing from array!\n",
-                      mdidx(mddev), partition_name(dev));
-               remove_descriptor(desc, sb);
+                       printk(KERN_WARNING "md%d: former device %s is 
unavailable, removing from array!\n",
+                              mdidx(mddev), partition_name(dev));
+                       remove_descriptor(desc, sb);
+               }
        }
 
        /*
-
To unsubscribe from this list: send the line "unsubscribe git-commits-24" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to