[ Thursday, September 16, 1999 ] James Manning wrote:
> Lingering question:
> - Can the 128 sector count for switching be changed safely?
>
> if so, I'd love to see something in /proc I could echo a new number into
> to tune the disk switching to my particular access patterns...
Since the previous sysctl code had been ripped out, this was pretty
simple, just pulling back in the code from 2.2.11-ac3. I'm hoping that
the sysctl getting ripped out was more for acceptance, since speed-limit
I still think was a good idea, even as a maximum, as it helped make the
array more usable...
More /proc-based tuning settings I think is always a good thing :)
The kernel boots with the below and it appears to work fine... I'll
be doing some bonnie runs as soon as an array frees up to check
performance implications... I'm hoping do a split-blocks setting
as well later on, to try and see if splitting all block requests
to the two mirrors could improve performance any or not.
diff -ruN linux-2.3.18_orig/drivers/block/raid1.c linux-2.3.18/drivers/block/raid1.c
--- linux-2.3.18_orig/drivers/block/raid1.c Thu Aug 12 13:16:28 1999
+++ linux-2.3.18/drivers/block/raid1.c Fri Sep 17 08:36:59 1999
@@ -19,6 +19,7 @@
#include <linux/malloc.h>
#include <linux/md.h>
#include <linux/raid1.h>
+#include <linux/sysctl.h>
#include <asm/bitops.h>
#include <asm/atomic.h>
@@ -39,6 +40,35 @@
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define MIN(a,b) ((a) < (b) ? (a) : (b))
+static int sysctl_raid1_balance_sect_limit = 128;
+
+static struct ctl_table_header *md_table_header;
+
+static ctl_table md_table[] = {
+ {DEV_MD_RAID1_BALANCE, "raid1-balance-sect-limit",
+ &sysctl_raid1_balance_sect_limit, sizeof(int), 0644, NULL, &proc_dointvec},
+ {0}
+};
+
+static ctl_table md_dir_table[] = {
+ {DEV_MD, "md", NULL, 0, 0555, md_table},
+ {0}
+};
+
+static ctl_table md_root_table[] = {
+ {CTL_DEV, "dev", NULL, 0, 0555, md_dir_table},
+ {0}
+};
+
+static void md_register_sysctl(void) {
+ md_table_header = register_sysctl_table(md_root_table, 1);
+}
+
+
+void md_unregister_sysctl(void) {
+ unregister_sysctl_table(md_table_header);
+}
+
static struct md_personality raid1_personality;
static struct md_thread *raid1_thread = NULL;
struct buffer_head *raid1_retry_list = NULL;
@@ -203,7 +233,8 @@
struct raid1_data *raid_conf = (struct raid1_data *) mddev->private;
struct buffer_head *mirror_bh[MD_SB_DISKS], *bh_req;
struct raid1_bh * r1_bh;
- int n = raid_conf->raid_disks, i, sum_bhs = 0, switch_disks = 0, sectors;
+ int n = raid_conf->raid_disks, i, sum_bhs = 0,
+ switch_disks = 0, sectors_per_blk;
struct mirror_info *mirror;
PRINTK(("raid1_make_request().\n"));
@@ -239,14 +270,14 @@
PRINTK(("raid1_make_request(), read branch.\n"));
mirror = raid_conf->mirrors + last_used;
bh->b_rdev = mirror->dev;
- sectors = bh->b_size >> 9;
- if (bh->b_blocknr * sectors == raid_conf->next_sect) {
- raid_conf->sect_count += sectors;
- if (raid_conf->sect_count >= mirror->sect_limit)
+ sectors_per_blk = bh->b_size >> 9;
+ if (bh->b_blocknr * sectors_per_blk == raid_conf->next_sect) {
+ raid_conf->sect_count += sectors_per_blk;
+ if (raid_conf->sect_count >= sysctl_raid1_balance_sect_limit)
switch_disks = 1;
} else
switch_disks = 1;
- raid_conf->next_sect = (bh->b_blocknr + 1) * sectors;
+ raid_conf->next_sect = (bh->b_blocknr + 1) * sectors_per_blk;
if (switch_disks) {
PRINTK(("read-balancing: switching %d -> %d (%d sectors)\n",
last_used, mirror->next, raid_conf->sect_count));
raid_conf->sect_count = 0;
@@ -486,7 +517,6 @@
mirror->raid_disk=n;
mirror->dev=dev;
mirror->next=0; /* FIXME */
- mirror->sect_limit=128;
mirror->operational=0;
mirror->spare=1;
@@ -752,7 +782,6 @@
raid_conf->mirrors[raid_disk].raid_disk = raid_disk;
raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev;
raid_conf->mirrors[raid_disk].operational = 1;
- raid_conf->mirrors[raid_disk].sect_limit = 128;
raid_conf->working_disks++;
} else {
/*
@@ -763,7 +792,6 @@
raid_conf->mirrors[raid_disk].number = descriptor->number;
raid_conf->mirrors[raid_disk].raid_disk = raid_disk;
raid_conf->mirrors[raid_disk].dev = mddev->devices [i].dev;
- raid_conf->mirrors[raid_disk].sect_limit = 128;
raid_conf->mirrors[raid_disk].operational = 0;
raid_conf->mirrors[raid_disk].write_only = 0;
@@ -851,6 +879,7 @@
{
if ((raid1_thread = md_register_thread(raid1d, NULL)) == NULL)
return -EBUSY;
+ md_register_sysctl();
return register_md_personality (RAID1, &raid1_personality);
}
diff -ruN linux-2.3.18_orig/drivers/char/Config.in linux-2.3.18/drivers/char/Config.in
--- linux-2.3.18_orig/drivers/char/Config.in Tue Sep 7 17:51:28 1999
+++ linux-2.3.18/drivers/char/Config.in Fri Sep 17 08:37:45 1999
@@ -69,7 +69,7 @@
dep_tristate 'ATIXL busmouse support' CONFIG_ATIXL_BUSMOUSE $CONFIG_BUSMOUSE
dep_tristate 'Logitech busmouse support' CONFIG_LOGIBUSMOUSE $CONFIG_BUSMOUSE
dep_tristate 'Microsoft busmouse support' CONFIG_MS_BUSMOUSE $CONFIG_BUSMOUSE
- if [ "$CONFIG_PPC" = "y" ; then
+ if [ "$CONFIG_PPC" = "y" ]; then
dep_tristate 'Apple Desktop Bus mouse support' CONFIG_ADBMOUSE $CONFIG_BUSMOUSE
fi
fi
diff -ruN linux-2.3.18_orig/include/linux/raid1.h linux-2.3.18/include/linux/raid1.h
--- linux-2.3.18_orig/include/linux/raid1.h Fri May 8 03:17:13 1998
+++ linux-2.3.18/include/linux/raid1.h Fri Sep 17 08:39:16 1999
@@ -8,7 +8,6 @@
int raid_disk;
kdev_t dev;
int next;
- int sect_limit;
/*
* State bits:
@@ -19,7 +18,6 @@
};
struct raid1_data {
- struct md_dev *mddev;
struct mirror_info mirrors[MD_SB_DISKS]; /* RAID1 devices, 2 to
MD_SB_DISKS */
int raid_disks;
int working_disks; /* Number of working disks */
@@ -27,6 +25,9 @@
unsigned long next_sect;
int sect_count;
int resync_running;
+ struct md_dev *mddev; /* since should be a little-used pointer back
+ up, move to the end to save another address
+ calculation in a common case of mirrors[] */
};
/*
diff -ruN linux-2.3.18_orig/include/linux/sysctl.h linux-2.3.18/include/linux/sysctl.h
--- linux-2.3.18_orig/include/linux/sysctl.h Fri Sep 10 14:06:19 1999
+++ linux-2.3.18/include/linux/sysctl.h Fri Sep 17 08:41:44 1999
@@ -490,7 +490,13 @@
enum {
DEV_CDROM=1,
DEV_HWMON=2,
- DEV_PARPORT=3
+ DEV_PARPORT=3,
+ DEV_MD=4
+};
+
+/* /proc/sys/dev/md */
+enum {
+ DEV_MD_RAID1_BALANCE=1
};
/* /proc/sys/dev/cdrom */