Ok, I wrote a patch that passes the ctl_table pointer of
/proc/dev/md as the param for raid?_init, but noticed differing
opinions on return values (although it doesn't much matter)

[root@jmm block]# grep raid._init *.c|grep -v return
md.c:void raid0_init (void);
md.c:void raid1_init (void);
md.c:void raid5_init (void);
md.c: raid0_init ();
md.c: raid1_init ();
md.c: raid5_init ();
raid0.c:void raid0_init (void)
raid1.c:int raid1_init (void)
raid5.c:int raid5_init (void)

So my plan was that md.c could handle /proc/dev/md/<variable>,
and each of the individual personalities would get to have
/proc/dev/md/<personality>/<variable>... this may or may not ideally
solve the contrived situation I came up with, but it a) allows much more
customization than current b) keeps me from having to either duplicate
the sysctl stuff in both md.c and raid1.c or something nastier c) is
about all I could see doing at boot/module load time sanely.

Since it looked like at least a couple of the *_init functions were
trying to use return values as error-handling, I just passed in the
location of the table pointer they needed to update... this way
they can simply ignore it and it'll stay the original NULL, or
as they wish, they can construct a table and add stuff in.

If there's already some laid-out plan for how the sysctl stuff
would end up evolving, please let me know :)

This patch is against 2.2.11 with the raid patch already applied
If someone could try this out, that'd be great, as I won't have a 
chance for a little while...

   for i in 4 8 16 32 64 128; do
      echo $i > /proc/dev/md/raid1/balance
      bonnie -s SOME_SIZE -m balance-$i | tee -a /tmp/bonnie.results
      bonnie -s SOME_SIZE -m balance-$i | tee -a /tmp/bonnie.results
      bonnie -s SOME_SIZE -m balance-$i | tee -a /tmp/bonnie.results
   done

James

diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/drivers/block/hsm.c 
linux-2.2.11/drivers/block/hsm.c
--- linux-2.2.11_raid/drivers/block/hsm.c       Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/drivers/block/hsm.c    Sat Sep 18 16:29:22 1999
@@ -787,7 +787,7 @@
 
 #ifndef MODULE
 
-md__initfunc(void hsm_init (void))
+md__initfunc(void hsm_init (struct ctl_table **table_loc))
 {
        register_md_personality (HSM, &hsm_personality);
 }
diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/drivers/block/linear.c 
linux-2.2.11/drivers/block/linear.c
--- linux-2.2.11_raid/drivers/block/linear.c    Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/drivers/block/linear.c Sat Sep 18 16:29:29 1999
@@ -203,7 +203,7 @@
 
 #ifndef MODULE
 
-md__initfunc(void linear_init (void))
+md__initfunc(void linear_init (struct ctl_table **table_loc))
 {
        register_md_personality (LINEAR, &linear_personality);
 }
diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/drivers/block/md.c 
linux-2.2.11/drivers/block/md.c
--- linux-2.2.11_raid/drivers/block/md.c        Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/drivers/block/md.c     Sat Sep 18 16:57:31 1999
@@ -92,22 +92,50 @@
 
 static struct ctl_table_header *md_table_header;
 
+/*
+ * Make sure to plug in the final ctl_table locations before register_sysctl()
+ */
 static ctl_table md_table[] = {
+#ifdef CONFIG_MD_HSM
+       {DEV_MD_HSM, "hsm", NULL, 0, 0555, NULL},
+#endif
+#ifdef CONFIG_MD_TRANSLUCENT
+       {DEV_MD_TRANSLUCENT, "translucent", NULL, 0, 0555, NULL},
+#endif
+#ifdef CONFIG_MD_LINEAR
+       {DEV_MD_LINEAR, "linear", NULL, 0, 0555, NULL},
+#endif
+#ifdef CONFIG_MD_STRIPED
+       {DEV_MD_RAID0, "raid0", NULL, 0, 0555, NULL},
+#endif
+#ifdef CONFIG_MD_MIRRORING
+       {DEV_MD_RAID1, "raid1", NULL, 0, 0555, NULL},
+#endif
+#ifdef CONFIG_MD_RAID5
+       {DEV_MD_RAID5, "raid5", NULL, 0, 0555, NULL},
+#endif
        {DEV_MD_SPEED_LIMIT, "speed-limit",
         &sysctl_speed_limit, sizeof(int), 0644, NULL, &proc_dointvec},
        {0}
 };
 
 static ctl_table md_dir_table[] = {
-        {DEV_MD, "md", NULL, 0, 0555, md_table},
-        {0}
+       {DEV_MD, "md", NULL, 0, 0555, md_table},
+       {0}
 };
 
 static ctl_table md_root_table[] = {
-        {CTL_DEV, "dev", NULL, 0, 0555, md_dir_table},
-        {0}
+       {CTL_DEV, "dev", NULL, 0, 0555, md_dir_table},
+       {0}
 };
 
+
+/* 
+ * Do the md_table construction at register_sysctl time so the 
+ * various _init functions will have constructed their ctl_tables
+ * and passed their locations back
+ */
+
 static void md_register_sysctl(void)
 {
         md_table_header = register_sysctl_table(md_root_table, 1);
@@ -3907,12 +3935,12 @@
 }
 #endif
 
-void hsm_init (void);
-void translucent_init (void);
-void linear_init (void);
-void raid0_init (void);
-void raid1_init (void);
-void raid5_init (void);
+int hsm_init (struct ctl_table **);
+int translucent_init (struct ctl_table **);
+int linear_init (struct ctl_table **);
+int raid0_init (struct ctl_table **);
+int raid1_init (struct ctl_table **);
+int raid5_init (struct ctl_table **);
 
 md__initfunc(int md_init (void))
 {
@@ -3940,26 +3968,39 @@
                printk(KERN_ALERT "bug: couldn't allocate md_recovery_thread\n");
 
        md_register_reboot_notifier(&md_notifier);
-       md_register_sysctl();
 
+/*
+ * FIXME: Look into just creating the md_table dynamically
+ *        instead of static initialization, as this is heavily
+ *        dependent on initial ordering in md_table
+ *        
+ *        Also note that we aren't handling any of the error
+ *        codes that the _init functions may be returning
+ */
 #ifdef CONFIG_MD_HSM
-       hsm_init ();
+       hsm_init (&md_table[0].child);
 #endif
 #ifdef CONFIG_MD_TRANSLUCENT
-       translucent_init ();
+       translucent_init (&md_table[1].child);
 #endif
 #ifdef CONFIG_MD_LINEAR
-       linear_init ();
+       linear_init (&md_table[2].child);
 #endif
 #ifdef CONFIG_MD_STRIPED
-       raid0_init ();
+       raid0_init (&md_table[3].child);
 #endif
 #ifdef CONFIG_MD_MIRRORING
-       raid1_init ();
+       raid1_init (&md_table[4].child);
 #endif
 #ifdef CONFIG_MD_RAID5
-       raid5_init ();
+       raid5_init (&md_table[5].child);
 #endif
+
+       /*
+        * make sure we've done the init calls for each personality so
+        * the various ctl_tables have been constructed and returned
+        */
+       md_register_sysctl();
 #if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
         /*
          * pick a XOR routine, runtime.
diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/drivers/block/raid0.c 
linux-2.2.11/drivers/block/raid0.c
--- linux-2.2.11_raid/drivers/block/raid0.c     Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/drivers/block/raid0.c  Sat Sep 18 17:03:44 1999
@@ -334,9 +334,9 @@
 
 #ifndef MODULE
 
-void raid0_init (void)
+int raid0_init (struct ctl_table **table_loc)
 {
-       register_md_personality (RAID0, &raid0_personality);
+       return register_md_personality (RAID0, &raid0_personality);
 }
 
 #else
diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/drivers/block/raid1.c 
linux-2.2.11/drivers/block/raid1.c
--- linux-2.2.11_raid/drivers/block/raid1.c     Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/drivers/block/raid1.c  Sat Sep 18 16:57:58 1999
@@ -30,6 +30,7 @@
 
 static mdk_personality_t raid1_personality;
 struct buffer_head *raid1_retry_list = NULL;
+static int sysctl_raid1_read_balance = MAX_LINEAR_SECTORS;
 
 static void * raid1_kmalloc (int size)
 {
@@ -169,7 +170,7 @@
 }
 
 /*
- * This routine checks if the undelying device is an md device
+ * This routine checks if the underlying device is an md device
  * and in that case it maps the blocks before putting the
  * request on the queue
  */
@@ -244,7 +245,7 @@
 
                if (bh->b_blocknr * sectors == conf->next_sect) {
                        conf->sect_count += sectors;
-                       if (conf->sect_count >= mirror->sect_limit)
+                       if (conf->sect_count > sysctl_raid1_read_balance)
                                switch_disks = 1;
                } else
                        switch_disks = 1;
@@ -970,7 +971,6 @@
                        disk->number = descriptor->number;
                        disk->raid_disk = disk_idx;
                        disk->dev = rdev->dev;
-                       disk->sect_limit = MAX_LINEAR_SECTORS;
                        disk->operational = 0;
                        disk->write_only = 0;
                        disk->spare = 0;
@@ -1001,7 +1001,6 @@
                        disk->number = descriptor->number;
                        disk->raid_disk = disk_idx;
                        disk->dev = rdev->dev;
-                       disk->sect_limit = MAX_LINEAR_SECTORS;
                        disk->operational = 1;
                        disk->write_only = 0;
                        disk->spare = 0;
@@ -1015,7 +1014,6 @@
                        disk->number = descriptor->number;
                        disk->raid_disk = disk_idx;
                        disk->dev = rdev->dev;
-                       disk->sect_limit = MAX_LINEAR_SECTORS;
                        disk->operational = 0;
                        disk->write_only = 0;
                        disk->spare = 1;
@@ -1216,8 +1214,15 @@
        raid1_restart_resync
 };
 
-int raid1_init (void)
+int raid1_init (struct ctl_table **table_loc)
 {
+       static ctl_table raid1_table[] = {
+               {DEV_MD_RAID1_BALANCE, "balance",
+                &sysctl_raid1_read_balance, sizeof(int), 0644, NULL, &proc_dointvec},
+               {0}
+       };
+       (*table_loc)=raid1_table;
+
        return register_md_personality (RAID1, &raid1_personality);
 }
 
diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/drivers/block/raid5.c 
linux-2.2.11/drivers/block/raid5.c
--- linux-2.2.11_raid/drivers/block/raid5.c     Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/drivers/block/raid5.c  Sat Sep 18 16:29:46 1999
@@ -2077,7 +2077,7 @@
        raid5_restart_resync
 };
 
-int raid5_init (void)
+int raid5_init (struct ctl_table **table_loc)
 {
        int err;
 
diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/drivers/block/translucent.c 
linux-2.2.11/drivers/block/translucent.c
--- linux-2.2.11_raid/drivers/block/translucent.c       Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/drivers/block/translucent.c    Sat Sep 18 16:29:09 1999
@@ -115,7 +115,7 @@
 
 #ifndef MODULE
 
-md__initfunc(void translucent_init (void))
+md__initfunc(void translucent_init (struct ctl_table **table_loc))
 {
        register_md_personality (TRANSLUCENT, &translucent_personality);
 }
diff --exclude-from=/tmp/x -ruN linux-2.2.11_raid/include/linux/sysctl.h 
linux-2.2.11/include/linux/sysctl.h
--- linux-2.2.11_raid/include/linux/sysctl.h    Sat Sep 18 17:33:52 1999
+++ linux-2.2.11/include/linux/sysctl.h Sat Sep 18 16:47:48 1999
@@ -436,7 +436,18 @@
 
 /* /proc/sys/dev/md */
 enum {
-       DEV_MD_SPEED_LIMIT=1
+       DEV_MD_SPEED_LIMIT=1,
+       DEV_MD_HSM=2,
+       DEV_MD_TRANSLUCENT=3,
+       DEV_MD_LINEAR=4,
+       DEV_MD_RAID0=5,
+       DEV_MD_RAID1=6,
+       DEV_MD_RAID5=7
+};
+
+/* /proc/sys/dev/md/raid1 */
+enum {
+       DEV_MD_RAID1_BALANCE=1
 };
 
 #ifdef __KERNEL__

Reply via email to