mdstat & raidstop panic

D. Lance Robinson Thu, 13 May 1999 10:23:06 -0700

Hi all,

Attached is a fix for a problem that happens when /proc/mdstat is read
when a raid device is being stopped. A panic could result.

Not many users are reading /proc/mdstat much or stopping a raid device
manually, but this problem caused us many headaches.

The problem happens something like...
1) raidstop is run
2) raidstop process removes superblock structure from raid device
structure before being removed from the list of raid devices.
3) /proc/mdstat starts reading raid device structures and
   tries to read the superblock data that doesn't exist.
4) panic.

Solution:
* Added a new semaphore that protects the all_mddevs list.
* Added lock and unlock code around each reference to the list.
* Needed to fix some other related semaphore use.
* Modified md_status so it will check for null sb ptr. if found,
  a message like the following is given:
 md1 : inactive sb

Note: I could very quickly run into the problem before (in about 5
seconds.) I ran a few scripts to test the routine out and it started and
stopped three independent raid arrays while another script just read
/proc/mdstat. This ran for over 50,000 total start/stop cycles. For some
reason, one of the raid devices got in the 'D' state. This seems
unrelated to the given fix since down_interruptible is being used.

Back to other things...
<>< Lance.

--- linux-r16a/drivers/block/md.c       Tue May 11 00:05:30 1999
+++ linux/drivers/block/md.c    Thu Apr 29 23:23:53 1999
@@ -162,6 +162,18 @@
  */
 static MD_LIST_HEAD(all_mddevs);
 
+/*
+ * The all_mddevs_sem must be taken before modifying the all_mddevs list.
+ * It should only be needed when either adding or removing an mddev.
+ * You must NOT have any mddev->reconfig_sem locked while locking
+ * this semaphore.
+ */
+static struct semaphore all_mddevs_sem = MUTEX;
+
+/*
+ * Allocates an mddev structure.
+ * Returns: the pointer to the mddev_t structure which is locked.
+ */
 static mddev_t * alloc_mddev (kdev_t dev)
 {
        mddev_t * mddev;
@@ -186,9 +198,13 @@
         * personalities can create additional mddevs 
         * if necessary.
         */
+       lock_all_mddevs();
+       lock_mddev( mddev );
        add_mddev_mapping(mddev, dev, 0);
        md_list_add(&mddev->all_mddevs, &all_mddevs);
+       unlock_all_mddevs();
 
+       /* NOTE: this mddev is still locked! */
        return mddev;
 }
 
@@ -208,9 +224,14 @@
        while (md_atomic_read(&mddev->recovery_sem.count) != 1)
                schedule();
 
+       unlock_mddev( mddev );
+       lock_all_mddevs();      /* lock the list */
+       lock_mddev( mddev );    /* Just in case we got blocked for all. */
+
        del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev)));
        md_list_del(&mddev->all_mddevs);
        MD_INIT_LIST_HEAD(&mddev->all_mddevs);
+       unlock_all_mddevs();
        kfree(mddev);
 }
 
@@ -1878,6 +1899,7 @@
                        md_list_del(&rdev->pending);
                        MD_INIT_LIST_HEAD(&rdev->pending);
                }
+               unlock_mddev(mddev);
                autorun_array(mddev);
        }
        printk("... autorun DONE.\n");
@@ -2556,14 +2586,6 @@
                                err = -ENOMEM;
                                goto abort;
                        }
-                       /*
-                        * alloc_mddev() should possibly self-lock.
-                        */
-                       err = lock_mddev(mddev);
-                       if (err) {
-                               printk("ioctl, reason %d, cmd %d\n",err, cmd);
-                               goto abort;
-                       }
                        err = set_array_info(mddev, (void *)arg);
                        goto done_unlock;
 
@@ -3189,6 +3221,7 @@
                }
 
                if (!mddev->pers) {
+                       unlock_mddev( mddev );
                        sz += sprintf(page+sz, "\n");
                        continue;
                }
@@ -3201,9 +3234,12 @@
                        if (md_atomic_read(&mddev->resync_sem.count) != 1)
                                sz += sprintf(page + sz, " resync=DELAYED");
                }
+               unlock_mddev( mddev );
                sz += sprintf(page + sz, "\n");
        }
        sz += status_unused (page + sz);
+
+       unlock_all_mddevs();
 
        return (sz);
 }
--- linux-r16a/include/linux/raid/md_k.h        Tue May 11 00:05:30 1999
+++ linux/include/linux/raid/md_k.h     Thu Apr 29 23:23:52 1999
@@ -294,6 +294,13 @@
        ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
 
 /*
+ * It would be better for these to be inline, but all_mddevs_sem is static.
+ * This is the locking mechanism for the all_mddevs list.
+ */
+#define lock_all_mddevs()      down_interruptible( &all_mddevs_sem )
+#define unlock_all_mddevs()    up( &all_mddevs_sem )
+
+/*
  * iterates through all used mddevs in the system.
  */
 #define ITERATE_MDDEV(mddev,tmp)                                       \

Fix for /proc/mdstat & raidstop panic

Reply via email to