Linus,
I wonder if you would consider applying, or commenting on this patch.
It adds support for partitioning md devices. In particular, a new
major device is created (name=="mdp", number assigned dynamically)
which provides for 15 partitions on each of the first 16 md devices.
I understand that a more uniform approach to partitioning might get
introduced in 2.5, but this seems the best approach for 2.4.
This is particularly useful if you want to have a mirrored boot
drive, rather than two drives with lots of mirrored partitions.
It is also useful for supporting what I call "winware raid", which is
the raid-controller requivalent of "winmodems" - minimal hardware and
most of the support done in software.
Among the things that this patch does are:
1/ tidy up some terminology. Currently there is a one-to-one
mapping between minor numbers and raid arrays or "units", so the
term "minor" is used when referring to either read minor number or
to a unit.
This patch introduces the term "unit" to be used to identify which
particular array is being referred to, and keeps "minor" just for
when a minor device number is realy implied.
2/ When reporting the geometry of a partitioned raid1 array, the
geometry of the underlying device is reported. For all other arrays
the 2x4xLARGE geometry is maintained.
3/ The hardsectsize of partitions in a RAID5 array is set the the
PAGESIZE because raid5 doesn't cope well with receiving requests
with different blocksizes.
4/ The new device reports a name of "md" (via hd_struct->major_name)
so partitions look like mda3 or md/disc0/part3, but registers the
name "mdp" so that /proc/devices shows the major number next to
"mdp".
5/ devices ioctls for re-reading the partition table and setting
partition table information.
--- ./include/linux/raid/md.h 2001/07/01 22:59:38 1.1
+++ ./include/linux/raid/md.h 2001/07/01 22:59:47 1.2
@@ -61,8 +61,11 @@
extern int md_size[MAX_MD_DEVS];
extern struct hd_struct md_hd_struct[MAX_MD_DEVS];
-extern void add_mddev_mapping (mddev_t *mddev, kdev_t dev, void *data);
-extern void del_mddev_mapping (mddev_t *mddev, kdev_t dev);
+extern int mdp_size[MAX_MDP_DEVS<<MDP_MINOR_SHIFT];
+extern struct hd_struct mdp_hd_struct[MAX_MDP_DEVS<<MDP_MINOR_SHIFT];
+
+extern void add_mddev_mapping (mddev_t *mddev, int unit, void *data);
+extern void del_mddev_mapping (mddev_t *mddev, int unit);
extern char * partition_name (kdev_t dev);
extern int register_md_personality (int p_num, mdk_personality_t *p);
extern int unregister_md_personality (int p_num);
--- ./include/linux/raid/md_k.h 2001/07/01 22:59:38 1.1
+++ ./include/linux/raid/md_k.h 2001/07/01 22:59:47 1.2
@@ -15,6 +15,7 @@
#ifndef _MD_K_H
#define _MD_K_H
+
#define MD_RESERVED 0UL
#define LINEAR 1UL
#define STRIPED 2UL
@@ -60,7 +61,10 @@
#error MD doesnt handle bigger kdev yet
#endif
+#define MDP_MINOR_SHIFT 4
+
#define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */
+#define MAX_MDP_DEVS (1<<(MINORBITS-MDP_MINOR_SHIFT)) /* Max number of md dev */
/*
* Maps a kdev to an mddev/subdev. How 'data' is handled is up to
@@ -73,11 +77,17 @@
extern dev_mapping_t mddev_map [MAX_MD_DEVS];
+extern int mdp_major;
static inline mddev_t * kdev_to_mddev (kdev_t dev)
{
- if (MAJOR(dev) != MD_MAJOR)
+ int unit=0;
+ if (MAJOR(dev) == MD_MAJOR)
+ unit = MINOR(dev);
+ else if (MAJOR(dev) == mdp_major)
+ unit = MINOR(dev) >> MDP_MINOR_SHIFT;
+ else
BUG();
- return mddev_map[MINOR(dev)].mddev;
+ return mddev_map[unit].mddev;
}
/*
@@ -191,7 +201,7 @@
{
void *private;
mdk_personality_t *pers;
- int __minor;
+ int __unit;
mdp_super_t *sb;
int nb_dev;
struct md_list_head disks;
@@ -248,13 +258,34 @@
*/
static inline int mdidx (mddev_t * mddev)
{
- return mddev->__minor;
+ return mddev->__unit;
+}
+
+static inline int mdminor (mddev_t *mddev)
+{
+ return mdidx(mddev);
+}
+
+static inline int mdpminor (mddev_t *mddev)
+{
+ return mdidx(mddev)<< MDP_MINOR_SHIFT;
+}
+
+static inline kdev_t md_kdev (mddev_t *mddev)
+{
+ return MKDEV(MD_MAJOR, mdminor(mddev));
}
-static inline kdev_t mddev_to_kdev(mddev_t * mddev)
+static inline kdev_t mdp_kdev (mddev_t *mddev, int part)
{
- return MKDEV(MD_MAJOR, mdidx(mddev));
+ return MKDEV(mdp_major, mdpminor(mddev)+part);
}
+
+#define foreach_part(tmp,mddev) \
+ if (mdidx(mddev)<MAX_MDP_DEVS) \
+ for(tmp=mdpminor(mddev); \
+ tmp<mdpminor(mddev)+(1<<MDP_MINOR_SHIFT); \
+ tmp++)
extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
--- ./drivers/md/md.c 2001/07/01 22:58:53 1.3
+++ ./drivers/md/md.c 2001/07/01 22:59:47 1.4
@@ -110,9 +110,17 @@
static int md_blocksizes[MAX_MD_DEVS];
static int md_hardsect_sizes[MAX_MD_DEVS];
static int md_maxreadahead[MAX_MD_DEVS];
+int md_size[MAX_MD_DEVS];
+
+int mdp_major;
+struct hd_struct mdp_hd_struct[MAX_MDP_DEVS<<MDP_MINOR_SHIFT];
+static int mdp_blocksizes[MAX_MDP_DEVS<<MDP_MINOR_SHIFT];
+static int mdp_hardsect_sizes[MAX_MDP_DEVS<<MDP_MINOR_SHIFT];
+static int mdp_maxreadahead[MAX_MDP_DEVS<<MDP_MINOR_SHIFT];
+int mdp_size[MAX_MDP_DEVS<<MDP_MINOR_SHIFT];
+
static mdk_thread_t *md_recovery_thread;
-int md_size[MAX_MD_DEVS];
extern struct block_device_operations md_fops;
static devfs_handle_t devfs_handle;
@@ -130,6 +138,18 @@
next: NULL,
fops: &md_fops,
};
+static struct gendisk mdp_gendisk=
+{
+ major_name: "md",
+ minor_shift: MDP_MINOR_SHIFT,
+ max_p: 1<<MDP_MINOR_SHIFT,
+ part: mdp_hd_struct,
+ sizes: mdp_size,
+ nr_real: MAX_MDP_DEVS,
+ real_devices: NULL,
+ next: NULL,
+ fops: &md_fops,
+};
/*
* Enables to iterate over all existing md arrays
@@ -143,65 +163,53 @@
*/
dev_mapping_t mddev_map[MAX_MD_DEVS];
-void add_mddev_mapping (mddev_t * mddev, kdev_t dev, void *data)
+void add_mddev_mapping (mddev_t * mddev, int unit, void *data)
{
- unsigned int minor = MINOR(dev);
-
- if (MAJOR(dev) != MD_MAJOR) {
+ if (mddev_map[unit].mddev != NULL) {
MD_BUG();
return;
}
- if (mddev_map[minor].mddev != NULL) {
- MD_BUG();
- return;
- }
- mddev_map[minor].mddev = mddev;
- mddev_map[minor].data = data;
+ mddev_map[unit].mddev = mddev;
+ mddev_map[unit].data = data;
}
-void del_mddev_mapping (mddev_t * mddev, kdev_t dev)
+void del_mddev_mapping (mddev_t * mddev, int unit)
{
- unsigned int minor = MINOR(dev);
-
- if (MAJOR(dev) != MD_MAJOR) {
- MD_BUG();
- return;
- }
- if (mddev_map[minor].mddev != mddev) {
+ if (mddev_map[unit].mddev != mddev) {
MD_BUG();
return;
}
- mddev_map[minor].mddev = NULL;
- mddev_map[minor].data = NULL;
+ mddev_map[unit].mddev = NULL;
+ mddev_map[unit].data = NULL;
}
static int md_make_request (request_queue_t *q, int rw, struct buffer_head * bh)
{
+ unsigned int minor = MINOR(bh->b_rdev);
mddev_t *mddev = kdev_to_mddev(bh->b_rdev);
- if (mddev && mddev->pers)
+ if (mddev && mddev->pers) {
+ if (MAJOR(bh->b_rdev) == mdp_major)
+ /* map the sector for partitions */
+ bh->b_rsector += mdp_hd_struct[minor].start_sect;
return mddev->pers->make_request(mddev, rw, bh);
- else {
+ } else {
buffer_IO_error(bh);
return 0;
}
}
-static mddev_t * alloc_mddev (kdev_t dev)
+static mddev_t * alloc_mddev (int unit)
{
mddev_t *mddev;
- if (MAJOR(dev) != MD_MAJOR) {
- MD_BUG();
- return 0;
- }
mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL);
if (!mddev)
return NULL;
memset(mddev, 0, sizeof(*mddev));
- mddev->__minor = MINOR(dev);
+ mddev->__unit = unit;
init_MUTEX(&mddev->reconfig_sem);
init_MUTEX(&mddev->recovery_sem);
init_MUTEX(&mddev->resync_sem);
@@ -214,7 +222,7 @@
* personalities can create additional mddevs
* if necessary.
*/
- add_mddev_mapping(mddev, dev, 0);
+ add_mddev_mapping(mddev, unit, 0);
md_list_add(&mddev->all_mddevs, &all_mddevs);
MOD_INC_USE_COUNT;
@@ -340,7 +348,7 @@
ITERATE_RDEV(mddev,rdev,tmp) {
rdev->size &= mask;
- md_size[mdidx(mddev)] += rdev->size;
+ md_size[mdminor(mddev)] += rdev->size;
}
return 0;
}
@@ -739,14 +747,19 @@
static void free_mddev (mddev_t *mddev)
{
+ int part;
if (!mddev) {
MD_BUG();
return;
}
export_array(mddev);
- md_size[mdidx(mddev)] = 0;
- md_hd_struct[mdidx(mddev)].nr_sects = 0;
+ md_size[mdminor(mddev)] = 0;
+ md_hd_struct[mdminor(mddev)].nr_sects = 0;
+ foreach_part(part, mddev) {
+ mdp_size[part] = 0;
+ mdp_hd_struct[part].nr_sects = 0;
+ }
/*
* Make sure nobody else is using this mddev
@@ -757,7 +770,7 @@
while (md_atomic_read(&mddev->recovery_sem.count) != 1)
schedule();
- del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev)));
+ del_mddev_mapping(mddev, mdidx(mddev));
md_list_del(&mddev->all_mddevs);
MD_INIT_LIST_HEAD(&mddev->all_mddevs);
kfree(mddev);
@@ -1521,9 +1534,10 @@
printk (UNKNOWN_LEVEL, mdidx(mddev), sb->level);
goto abort;
}
- if (!md_size[mdidx(mddev)])
- md_size[mdidx(mddev)] = sb->size * data_disks;
-
+ if (!md_size[mdminor(mddev)])
+ md_size[mdminor(mddev)] = sb->size * data_disks;
+ mdp_size[mdpminor(mddev)] = md_size[mdminor(mddev)];
+
readahead = MD_READAHEAD;
if ((sb->level == 0) || (sb->level == 4) || (sb->level == 5)) {
readahead = (mddev->sb->chunk_size>>PAGE_SHIFT) * 4 * data_disks;
@@ -1533,7 +1547,8 @@
if (sb->level == -3)
readahead = 0;
}
- md_maxreadahead[mdidx(mddev)] = readahead;
+ md_maxreadahead[mdminor(mddev)] = readahead;
+ mdp_maxreadahead[mdpminor(mddev)] = readahead;
printk(KERN_INFO "md%d: max total readahead window set to %ldk\n",
mdidx(mddev), readahead*(PAGE_SIZE/1024));
@@ -1562,6 +1577,8 @@
int chunk_size;
struct md_list_head *tmp;
mdk_rdev_t *rdev;
+ int part,minor,pminor;
+ int blksize, sectsize;
if (!mddev->nb_dev) {
@@ -1576,7 +1593,9 @@
* Resize disks to align partitions size on a given
* chunk size.
*/
- md_size[mdidx(mddev)] = 0;
+ minor = mdminor(mddev);
+ pminor = mdpminor(mddev);
+ md_size[minor] = 0;
/*
* Analyze all RAID superblock(s)
@@ -1644,21 +1663,42 @@
* device.
* Also find largest hardsector size
*/
- md_hardsect_sizes[mdidx(mddev)] = 512;
+ sectsize = 512;
ITERATE_RDEV(mddev,rdev,tmp) {
if (rdev->faulty)
continue;
invalidate_device(rdev->dev, 1);
if (get_hardsect_size(rdev->dev)
- > md_hardsect_sizes[mdidx(mddev)])
- md_hardsect_sizes[mdidx(mddev)] =
+ > sectsize)
+ sectsize =
get_hardsect_size(rdev->dev);
}
- md_blocksizes[mdidx(mddev)] = 1024;
- if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)])
- md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)];
+
+ blksize = 1024;
+ if (blksize < sectsize)
+ blksize = sectsize;
mddev->pers = pers[pnum];
+ md_hardsect_sizes[minor] = sectsize;
+ md_blocksizes[minor] = blksize;
+
+ foreach_part(part, mddev) {
+ mdp_hardsect_sizes[part] = sectsize;
+ mdp_blocksizes[part] = blksize;
+
+ /* When partitioned, raid5 really must use page size blocks
+ * or the stripe cache gets confused
+ * However, setting hardsect_size for the whole disc
+ * device confuses partitioning tools.
+ * So we just change it for remaining partitions
+ */
+ if (pnum == RAID5) {
+ sectsize = PAGE_SIZE;
+ blksize = PAGE_SIZE;
+ }
+ }
+
+
err = mddev->pers->run(mddev);
if (err) {
printk("md: pers->run() failed ...\n");
@@ -1673,12 +1713,18 @@
* md_size has units of 1K blocks, which are
* twice as large as sectors.
*/
- md_hd_struct[mdidx(mddev)].start_sect = 0;
- register_disk(&md_gendisk, MKDEV(MAJOR_NR,mdidx(mddev)),
- 1, &md_fops, md_size[mdidx(mddev)]<<1);
-
+ md_hd_struct[minor].start_sect = 0;
read_ahead[MD_MAJOR] = 1024;
- return (0);
+ read_ahead[mdp_major] = 1024;
+
+ register_disk(&md_gendisk, md_kdev(mddev),
+ 1, &md_fops, md_size[minor]<<1);
+ if (mdidx(mddev) < MAX_MDP_DEVS)
+ register_disk(&mdp_gendisk, mdp_kdev(mddev,0),
+ 1 << MDP_MINOR_SHIFT, &md_fops,
+ mdp_size[minor] << 1);
+
+ return 0;
}
#undef TOO_BIG_CHUNKSIZE
@@ -1689,6 +1735,7 @@
static int restart_array (mddev_t *mddev)
{
int err = 0;
+ int part;
/*
* Complain if it has no devices
@@ -1701,7 +1748,9 @@
OUT(-EBUSY);
mddev->ro = 0;
- set_device_ro(mddev_to_kdev(mddev), 0);
+ set_device_ro(md_kdev(mddev), 0);
+ foreach_part(part, mddev)
+ set_device_ro(MKDEV(mdp_major,part), 0);
printk (KERN_INFO
"md: md%d switched to read-write mode.\n", mdidx(mddev));
@@ -1726,7 +1775,7 @@
static int do_md_stop (mddev_t * mddev, int ro)
{
int err = 0, resync_interrupted = 0;
- kdev_t dev = mddev_to_kdev(mddev);
+ int part;
if (atomic_read(&mddev->active)>1) {
printk(STILL_IN_USE, mdidx(mddev));
@@ -1755,22 +1804,37 @@
down(&mddev->recovery_sem);
up(&mddev->recovery_sem);
- invalidate_device(dev, 1);
+ invalidate_device(md_kdev(mddev), 1);
+ foreach_part(part, mddev) {
+ kdev_t pdev = MKDEV(mdp_major, part);
+ invalidate_device(pdev, 1);
+ }
if (ro) {
if (mddev->ro)
OUT(-ENXIO);
mddev->ro = 1;
} else {
- if (mddev->ro)
- set_device_ro(dev, 0);
+ if (mddev->ro) {
+ set_device_ro(md_kdev(mddev), 0);
+ foreach_part(part, mddev)
+ set_device_ro(MKDEV(mdp_major, part), 0);
+ }
if (mddev->pers->stop(mddev)) {
- if (mddev->ro)
- set_device_ro(dev, 1);
+ if (mddev->ro) {
+ set_device_ro(md_kdev(mddev), 1);
+ foreach_part(part, mddev)
+ set_device_ro(MKDEV(mdp_major, part),
+1);
+ }
OUT(-EBUSY);
}
if (mddev->ro)
mddev->ro = 0;
+ /* unregister from devfs */
+ if (mdidx(mddev) < MAX_MDP_DEVS)
+ devfs_register_partitions(&mdp_gendisk,
+ mdpminor(mddev),
+ 1);
}
if (mddev->sb) {
/*
@@ -1783,8 +1847,11 @@
}
md_update_sb(mddev);
}
- if (ro)
- set_device_ro(dev, 1);
+ if (ro) {
+ set_device_ro(md_kdev(mddev),1);
+ foreach_part(part, mddev)
+ set_device_ro(MKDEV(mdp_major,part), 1);
+ }
}
/*
@@ -1864,8 +1931,7 @@
struct md_list_head *tmp;
mdk_rdev_t *rdev0, *rdev;
mddev_t *mddev;
- kdev_t md_kdev;
-
+ int unit;
printk("md: autorun ...\n");
while (pending_raid_disks.next != &pending_raid_disks) {
@@ -1890,8 +1956,8 @@
* mostly sane superblocks. It's time to allocate the
* mddev.
*/
- md_kdev = MKDEV(MD_MAJOR, rdev0->sb->md_minor);
- mddev = kdev_to_mddev(md_kdev);
+ unit = rdev0->sb->md_minor;
+ mddev = mddev_map[unit].mddev;
if (mddev) {
printk("md: md%d already running, cannot run %s\n",
mdidx(mddev), partition_name(rdev0->dev));
@@ -1899,12 +1965,12 @@
export_rdev(rdev);
continue;
}
- mddev = alloc_mddev(md_kdev);
+ mddev = alloc_mddev(unit);
if (mddev == NULL) {
printk("md: cannot allocate memory for md drive.\n");
break;
}
- if (md_kdev == countdev)
+ if (unit == countdev)
atomic_inc(&mddev->active);
printk("md: created md%d\n", mdidx(mddev));
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) {
@@ -2389,6 +2455,11 @@
SET_SB(nr_disks);
SET_SB(raid_disks);
SET_SB(md_minor);
+ /* I want to trust the unit number of mddev more than
+ * md_minorfrom info - neilb
+ */
+ mddev->sb->md_minor = mdidx(mddev);
+
SET_SB(not_persistent);
SET_SB(state);
@@ -2448,11 +2519,59 @@
{
int ret;
- fsync_dev(mddev_to_kdev(mddev));
ret = md_error(mddev, dev);
return ret;
}
+static int md_reread_partition(mddev_t *mddev)
+{
+ /* copied from ide_revalidate_disk */
+ int minor = mdpminor(mddev);
+ int part;
+ if (mdidx(mddev) >= MAX_MDP_DEVS)
+ return 0;
+ foreach_part(part, mddev) {
+ if (mdp_hd_struct[part].nr_sects > 0) {
+ kdev_t dev = MKDEV(mdp_major, part);
+
+ invalidate_device(dev, 1);
+
+ if (mdp_hardsect_sizes[part] < 1024)
+ set_blocksize(dev, 1024);
+ else
+ set_blocksize(dev, mdp_hardsect_sizes[part]);
+ }
+ mdp_hd_struct[part].start_sect= 0;
+ mdp_hd_struct[part].nr_sects = 0;
+ }
+ /* and from revalidate_scsidisk.. */
+ grok_partitions(&mdp_gendisk, minor >> MDP_MINOR_SHIFT, 1<<MDP_MINOR_SHIFT,
+ mdp_size[minor]<<1);
+ return 0;
+}
+
+static int get_geo(kdev_t dev, int *heads, int *sectors)
+{
+ struct hd_geometry geom;
+ struct block_device *bdev;
+ int rv;
+
+ bdev = bdget(dev);
+ if (!bdev)
+ return -1;
+ rv = blkdev_get(bdev, FMODE_READ, 0, BDEV_FILE);
+ if (rv == 0) {
+ rv = ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)&geom);
+ blkdev_put(bdev, BDEV_FILE);
+ }
+ if (rv == 0) {
+ *heads = geom.heads;
+ *sectors = geom.sectors;
+ }
+ bdput(bdev);
+ return rv;
+}
+
static int md_ioctl (struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
@@ -2461,15 +2580,26 @@
struct hd_geometry *loc = (struct hd_geometry *) arg;
mddev_t *mddev = NULL;
kdev_t dev;
+ int unit=0;
+ struct hd_struct *hdp = NULL;
if (!md_capable_admin())
return -EACCES;
dev = inode->i_rdev;
minor = MINOR(dev);
- if (minor >= MAX_MD_DEVS)
+ if (( MAJOR(dev) == MD_MAJOR && minor >= MAX_MD_DEVS) ||
+ ( MAJOR(dev) == mdp_major && minor >= (MAX_MDP_DEVS<<MDP_MINOR_SHIFT)))
return -EINVAL;
+ if (MAJOR(dev) == MD_MAJOR) {
+ unit = MINOR(dev);
+ hdp = &md_hd_struct[minor];
+ } else if (MAJOR(dev) == mdp_major) {
+ unit = MINOR(dev) >> MDP_MINOR_SHIFT;
+ hdp = &mdp_hd_struct[minor];
+ } else
+ BUG();
/*
* Commands dealing with the RAID driver but not any
* particular array:
@@ -2497,8 +2627,9 @@
err = -EINVAL;
goto abort;
}
- err = md_put_user(md_hd_struct[minor].nr_sects,
- (long *) arg);
+ err = md_put_user(hdp->nr_sects,
+ (long *) arg);
+
goto done;
case BLKFLSBUF:
@@ -2546,7 +2677,7 @@
switch (cmd)
{
case SET_ARRAY_INFO:
- mddev = alloc_mddev(dev);
+ mddev = alloc_mddev(unit);
if (!mddev) {
err = -ENOMEM;
goto abort;
@@ -2586,7 +2717,7 @@
/*
* possibly make it lock the array ...
*/
- err = autostart_array((kdev_t)arg, dev);
+ err = autostart_array((kdev_t)arg, unit);
if (err) {
printk("md: autostart %s failed!\n",
partition_name((kdev_t)arg));
@@ -2642,30 +2773,83 @@
err = do_md_stop (mddev, 1);
goto done_unlock;
- /*
- * We have a problem here : there is no easy way to give a CHS
- * virtual geometry. We currently pretend that we have a 2 heads
- * 4 sectors (with a BIG number of cylinders...). This drives
- * dosfs just mad... ;-)
- */
+ /*
+ * The geometry of a raid array is not necessarily well
+ * defined. There aren't always heads, sectors, and cylinders.
+ * The geometry is only really useful for fdisk and similar
+ * tools to align paritions.
+ * A a raid1 array, presenting the geometry of the underlying
+ * devices can be useful when partitioning. For
+ * all other arrays, including raid1 when not partitioning, we
+just
+ * give 2 heads, 4 sectors, and lots of cylinders.
+ *
+ */
case HDIO_GETGEO:
+ {
+ int sectors, heads;
if (!loc) {
err = -EINVAL;
goto abort_unlock;
}
- err = md_put_user (2, (char *) &loc->heads);
+ if (MAJOR(dev) == MD_MAJOR
+ || mddev->sb->level != 1) {
+ heads=2;
+ sectors=4;
+ } else {
+ struct md_list_head *tmp;
+ mdk_rdev_t *rdev, *found_rdev = NULL;
+ int rd = -1;
+
+ ITERATE_RDEV(mddev,rdev,tmp) {
+ mdp_disk_t *d;
+ d = &mddev->sb->disks[rdev->desc_nr];
+ if (disk_active(d) &&
+ (rd == -1 || d->raid_disk < rd)) {
+ rd = d->raid_disk;
+ found_rdev = rdev;
+ }
+ }
+ if (!found_rdev ||
+ get_geo(found_rdev->dev, &heads, §ors)!=0) {
+ heads = 2;
+ sectors = 4;
+ }
+ }
+ err = md_put_user ((char)heads, (char *) &loc->heads);
if (err)
goto abort_unlock;
- err = md_put_user (4, (char *) &loc->sectors);
+ err = md_put_user ((char)sectors, (char *) &loc->sectors);
if (err)
goto abort_unlock;
- err = md_put_user (md_hd_struct[mdidx(mddev)].nr_sects/8,
+ err = md_put_user (hdp->nr_sects/((int)heads*(int)sectors),
(short *) &loc->cylinders);
if (err)
goto abort_unlock;
- err = md_put_user (md_hd_struct[minor].start_sect,
+ err = md_put_user (hdp->start_sect,
(long *) &loc->start);
goto done_unlock;
+ }
+
+ case BLKPG:
+ if (MAJOR(dev) == MD_MAJOR) {
+ printk(KERN_WARNING "%s(pid %d) tried BLKPG ioctl on
+MD device. Should use MDP device.\n",
+ current->comm, current->pid);
+ err = -EINVAL;
+ }
+ else
+ err = blk_ioctl(dev, cmd, arg);
+ goto done_unlock;
+
+ case BLKRRPART: /* Re-read partition tables */
+ if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+ if (MAJOR(dev) == MD_MAJOR) {
+ printk(KERN_WARNING "%s(pid %d) tried BLKRRPART ioctl
+on MD device. Should use MDP device.\n",
+ current->comm, current->pid);
+ err = -EINVAL;
+ }
+ else
+ err = md_reread_partition(mddev);
+ goto done_unlock;
}
/*
@@ -3083,7 +3267,7 @@
if (mddev->nb_dev) {
if (mddev->pers)
sz += sprintf(page + sz, "\n %d blocks",
- md_size[mdidx(mddev)]);
+ md_size[mdminor(mddev)]);
else
sz += sprintf(page + sz, "\n %d blocks", size);
}
@@ -3496,10 +3680,15 @@
md_hardsect_sizes[i] = 512;
md_maxreadahead[i] = MD_READAHEAD;
}
- blksize_size[MAJOR_NR] = md_blocksizes;
- blk_size[MAJOR_NR] = md_size;
- max_readahead[MAJOR_NR] = md_maxreadahead;
- hardsect_size[MAJOR_NR] = md_hardsect_sizes;
+ blksize_size[MD_MAJOR] = md_blocksizes;
+ blk_size[MD_MAJOR] = md_size;
+ max_readahead[MD_MAJOR] = md_maxreadahead;
+ hardsect_size[MD_MAJOR] = md_hardsect_sizes;
+
+ blksize_size[mdp_major] = mdp_blocksizes;
+ blk_size[mdp_major] = mdp_size;
+ max_readahead[mdp_major] = mdp_maxreadahead;
+ hardsect_size[mdp_major] = mdp_hardsect_sizes;
dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
@@ -3517,11 +3706,23 @@
MD_MAJOR_VERSION, MD_MINOR_VERSION,
MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS);
- if (devfs_register_blkdev (MAJOR_NR, "md", &md_fops))
+ if (devfs_register_blkdev (MD_MAJOR, "md", &md_fops))
{
- printk (KERN_ALERT "md: Unable to get major %d for md\n", MAJOR_NR);
+ printk (KERN_ALERT "md: Unable to get major %d for md\n", MD_MAJOR);
return (-1);
}
+#ifdef MDP_MAJOR
+ mdp_major = MDP_MAJOR;
+ if (devfs_register_blkdev (mdp_major, "mdp", &md_fops))
+#else
+ mdp_major = register_blkdev(0, "mdp", &md_fops);
+ if (mdp_major <= 0)
+#endif
+ {
+ printk(KERN_ALERT "md: Unable to get major %d for md\n", mdp_major);
+ return -1;
+ }
+ mdp_gendisk.major = mdp_major;
devfs_handle = devfs_mk_dir (NULL, "md", NULL);
/* we don't use devfs_register_series because we want to fill md_hd_struct */
for (minor=0; minor < MAX_MD_DEVS; ++minor) {
@@ -3533,13 +3734,16 @@
}
/* forward all md request to md_make_request */
- blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_make_request);
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(MD_MAJOR), md_make_request);
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(mdp_major), md_make_request);
- read_ahead[MAJOR_NR] = INT_MAX;
+ read_ahead[MD_MAJOR] = INT_MAX;
+ read_ahead[mdp_major] = INT_MAX;
md_gendisk.next = gendisk_head;
-
gendisk_head = &md_gendisk;
+ mdp_gendisk.next = gendisk_head;
+ gendisk_head = &mdp_gendisk;
md_recovery_thread = md_register_thread(md_do_recovery, NULL, name);
if (!md_recovery_thread)
@@ -3706,17 +3910,17 @@
extern kdev_t name_to_kdev_t(char *line) md__init;
void md__init md_setup_drive(void)
{
- int minor, i;
+ int unit, i;
kdev_t dev;
mddev_t*mddev;
kdev_t devices[MD_SB_DISKS+1];
- for (minor = 0; minor < MAX_MD_DEVS; minor++) {
+ for (unit = 0; unit < MAX_MD_DEVS; unit++) {
int err = 0;
char *devname;
mdu_disk_info_t dinfo;
- if ((devname = md_setup_args.device_names[minor]) == 0) continue;
+ if ((devname = md_setup_args.device_names[unit]) == 0) continue;
for (i = 0; i < MD_SB_DISKS && devname != 0; i++) {
@@ -3740,34 +3944,34 @@
}
devices[i] = dev;
- md_setup_args.device_set[minor] = 1;
+ md_setup_args.device_set[unit] = 1;
devname = p;
}
devices[i] = 0;
- if (md_setup_args.device_set[minor] == 0)
+ if (md_setup_args.device_set[unit] == 0)
continue;
- if (mddev_map[minor].mddev) {
- printk("md: Ignoring md=%d, already autodetected. (Use
raid=noautodetect)\n", minor);
+ if (mddev_map[unit].mddev) {
+ printk("md: Ignoring md=%d, already autodetected. (Use
+raid=noautodetect)\n", unit);
continue;
}
- printk("md: Loading md%d: %s\n", minor,
md_setup_args.device_names[minor]);
+ printk("md: Loading md%d: %s\n", unit,
+md_setup_args.device_names[unit]);
- mddev = alloc_mddev(MKDEV(MD_MAJOR,minor));
+ mddev = alloc_mddev(unit);
if (mddev == NULL) {
- printk("md: kmalloc failed - cannot start array %d\n", minor);
+ printk("md: kmalloc failed - cannot start array %d\n", unit);
continue;
}
- if (md_setup_args.pers[minor]) {
+ if (md_setup_args.pers[unit]) {
/* non-persistent */
mdu_array_info_t ainfo;
- ainfo.level = pers_to_level(md_setup_args.pers[minor]);
+ ainfo.level = pers_to_level(md_setup_args.pers[unit]);
ainfo.size = 0;
ainfo.nr_disks =0;
ainfo.raid_disks =0;
- ainfo.md_minor =minor;
+ ainfo.md_minor =unit;
ainfo.not_persistent = 1;
ainfo.state = MD_SB_CLEAN;
@@ -3776,7 +3980,7 @@
ainfo.failed_disks = 0;
ainfo.spare_disks = 0;
ainfo.layout = 0;
- ainfo.chunk_size = md_setup_args.chunk[minor];
+ ainfo.chunk_size = md_setup_args.chunk[unit];
err = set_array_info(mddev, &ainfo);
for (i = 0; !err && (dev = devices[i]); i++) {
dinfo.number = i;
@@ -3803,7 +4007,7 @@
if (err) {
mddev->sb_dirty = 0;
do_md_stop(mddev, 0);
- printk("md: starting md%d failed\n", minor);
+ printk("md: starting md%d failed\n", unit);
}
}
}
@@ -3870,7 +4074,8 @@
md_unregister_thread(md_recovery_thread);
devfs_unregister(devfs_handle);
- devfs_unregister_blkdev(MAJOR_NR,"md");
+ devfs_unregister_blkdev(MD_MAJOR,"md");
+ devfs_unregister_blkdev(mdp_major,"mdp");
unregister_reboot_notifier(&md_notifier);
unregister_sysctl_table(raid_table_header);
#ifdef CONFIG_PROC_FS
@@ -3885,11 +4090,25 @@
}
gendisk_ptr = & (*gendisk_ptr)->next;
}
- blk_dev[MAJOR_NR].queue = NULL;
- blksize_size[MAJOR_NR] = NULL;
- blk_size[MAJOR_NR] = NULL;
- max_readahead[MAJOR_NR] = NULL;
- hardsect_size[MAJOR_NR] = NULL;
+ gendisk_ptr = &gendisk_head;
+ while (*gendisk_ptr) {
+ if (*gendisk_ptr == &mdp_gendisk) {
+ *gendisk_ptr = mdp_gendisk.next;
+ break;
+ }
+ gendisk_ptr = & (*gendisk_ptr)->next;
+ }
+ blk_dev[MD_MAJOR].queue = NULL;
+ blksize_size[MD_MAJOR] = NULL;
+ blk_size[MD_MAJOR] = NULL;
+ max_readahead[MD_MAJOR] = NULL;
+ hardsect_size[MD_MAJOR] = NULL;
+
+ blk_dev[mdp_major].queue = NULL;
+ blksize_size[mdp_major] = NULL;
+ blk_size[mdp_major] = NULL;
+ max_readahead[mdp_major] = NULL;
+ hardsect_size[mdp_major] = NULL;
free_device_names();
--- ./drivers/md/linear.c 2001/07/01 22:59:38 1.1
+++ ./drivers/md/linear.c 2001/07/01 22:59:47 1.2
@@ -66,8 +66,8 @@
}
nb_zone = conf->nr_zones =
- md_size[mdidx(mddev)] / conf->smallest->size +
- ((md_size[mdidx(mddev)] % conf->smallest->size) ? 1 : 0);
+ md_size[mdminor(mddev)] / conf->smallest->size +
+ ((md_size[mdminor(mddev)] % conf->smallest->size) ? 1 : 0);
conf->hash_table = kmalloc (sizeof (struct linear_hash) * nb_zone,
GFP_KERNEL);
--- ./drivers/md/raid0.c 2001/07/01 22:59:38 1.1
+++ ./drivers/md/raid0.c 2001/07/01 22:59:47 1.2
@@ -138,10 +138,10 @@
if (create_strip_zones (mddev))
goto out_free_conf;
- printk("raid0 : md_size is %d blocks.\n", md_size[mdidx(mddev)]);
+ printk("raid0 : md_size is %d blocks.\n", md_size[mdminor(mddev)]);
printk("raid0 : conf->smallest->size is %ld blocks.\n", conf->smallest->size);
- nb_zone = md_size[mdidx(mddev)]/conf->smallest->size +
- (md_size[mdidx(mddev)] % conf->smallest->size ? 1 : 0);
+ nb_zone = md_size[mdminor(mddev)]/conf->smallest->size +
+ (md_size[mdminor(mddev)] % conf->smallest->size ? 1 : 0);
printk("raid0 : nb_zone is %ld.\n", nb_zone);
conf->nr_zones = nb_zone;
-
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to [EMAIL PROTECTED]