Linus, I wonder if you would consider applying, or commenting on this patch. It adds support for partitioning md devices. In particular, a new major device is created (name=="mdp", number assigned dynamically) which provides for 15 partitions on each of the first 16 md devices. I understand that a more uniform approach to partitioning might get introduced in 2.5, but this seems the best approach for 2.4. This is particularly useful if you want to have a mirrored boot drive, rather than two drives with lots of mirrored partitions. It is also useful for supporting what I call "winware raid", which is the raid-controller requivalent of "winmodems" - minimal hardware and most of the support done in software. Among the things that this patch does are: 1/ tidy up some terminology. Currently there is a one-to-one mapping between minor numbers and raid arrays or "units", so the term "minor" is used when referring to either read minor number or to a unit. This patch introduces the term "unit" to be used to identify which particular array is being referred to, and keeps "minor" just for when a minor device number is realy implied. 2/ When reporting the geometry of a partitioned raid1 array, the geometry of the underlying device is reported. For all other arrays the 2x4xLARGE geometry is maintained. 3/ The hardsectsize of partitions in a RAID5 array is set the the PAGESIZE because raid5 doesn't cope well with receiving requests with different blocksizes. 4/ The new device reports a name of "md" (via hd_struct->major_name) so partitions look like mda3 or md/disc0/part3, but registers the name "mdp" so that /proc/devices shows the major number next to "mdp". 5/ devices ioctls for re-reading the partition table and setting partition table information. --- ./include/linux/raid/md.h 2001/07/01 22:59:38 1.1 +++ ./include/linux/raid/md.h 2001/07/01 22:59:47 1.2 @@ -61,8 +61,11 @@ extern int md_size[MAX_MD_DEVS]; extern struct hd_struct md_hd_struct[MAX_MD_DEVS]; -extern void add_mddev_mapping (mddev_t *mddev, kdev_t dev, void *data); -extern void del_mddev_mapping (mddev_t *mddev, kdev_t dev); +extern int mdp_size[MAX_MDP_DEVS<<MDP_MINOR_SHIFT]; +extern struct hd_struct mdp_hd_struct[MAX_MDP_DEVS<<MDP_MINOR_SHIFT]; + +extern void add_mddev_mapping (mddev_t *mddev, int unit, void *data); +extern void del_mddev_mapping (mddev_t *mddev, int unit); extern char * partition_name (kdev_t dev); extern int register_md_personality (int p_num, mdk_personality_t *p); extern int unregister_md_personality (int p_num); --- ./include/linux/raid/md_k.h 2001/07/01 22:59:38 1.1 +++ ./include/linux/raid/md_k.h 2001/07/01 22:59:47 1.2 @@ -15,6 +15,7 @@ #ifndef _MD_K_H #define _MD_K_H + #define MD_RESERVED 0UL #define LINEAR 1UL #define STRIPED 2UL @@ -60,7 +61,10 @@ #error MD doesnt handle bigger kdev yet #endif +#define MDP_MINOR_SHIFT 4 + #define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */ +#define MAX_MDP_DEVS (1<<(MINORBITS-MDP_MINOR_SHIFT)) /* Max number of md dev */ /* * Maps a kdev to an mddev/subdev. How 'data' is handled is up to @@ -73,11 +77,17 @@ extern dev_mapping_t mddev_map [MAX_MD_DEVS]; +extern int mdp_major; static inline mddev_t * kdev_to_mddev (kdev_t dev) { - if (MAJOR(dev) != MD_MAJOR) + int unit=0; + if (MAJOR(dev) == MD_MAJOR) + unit = MINOR(dev); + else if (MAJOR(dev) == mdp_major) + unit = MINOR(dev) >> MDP_MINOR_SHIFT; + else BUG(); - return mddev_map[MINOR(dev)].mddev; + return mddev_map[unit].mddev; } /* @@ -191,7 +201,7 @@ { void *private; mdk_personality_t *pers; - int __minor; + int __unit; mdp_super_t *sb; int nb_dev; struct md_list_head disks; @@ -248,13 +258,34 @@ */ static inline int mdidx (mddev_t * mddev) { - return mddev->__minor; + return mddev->__unit; +} + +static inline int mdminor (mddev_t *mddev) +{ + return mdidx(mddev); +} + +static inline int mdpminor (mddev_t *mddev) +{ + return mdidx(mddev)<< MDP_MINOR_SHIFT; +} + +static inline kdev_t md_kdev (mddev_t *mddev) +{ + return MKDEV(MD_MAJOR, mdminor(mddev)); } -static inline kdev_t mddev_to_kdev(mddev_t * mddev) +static inline kdev_t mdp_kdev (mddev_t *mddev, int part) { - return MKDEV(MD_MAJOR, mdidx(mddev)); + return MKDEV(mdp_major, mdpminor(mddev)+part); } + +#define foreach_part(tmp,mddev) \ + if (mdidx(mddev)<MAX_MDP_DEVS) \ + for(tmp=mdpminor(mddev); \ + tmp<mdpminor(mddev)+(1<<MDP_MINOR_SHIFT); \ + tmp++) extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev); extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr); --- ./drivers/md/md.c 2001/07/01 22:58:53 1.3 +++ ./drivers/md/md.c 2001/07/01 22:59:47 1.4 @@ -110,9 +110,17 @@ static int md_blocksizes[MAX_MD_DEVS]; static int md_hardsect_sizes[MAX_MD_DEVS]; static int md_maxreadahead[MAX_MD_DEVS]; +int md_size[MAX_MD_DEVS]; + +int mdp_major; +struct hd_struct mdp_hd_struct[MAX_MDP_DEVS<<MDP_MINOR_SHIFT]; +static int mdp_blocksizes[MAX_MDP_DEVS<<MDP_MINOR_SHIFT]; +static int mdp_hardsect_sizes[MAX_MDP_DEVS<<MDP_MINOR_SHIFT]; +static int mdp_maxreadahead[MAX_MDP_DEVS<<MDP_MINOR_SHIFT]; +int mdp_size[MAX_MDP_DEVS<<MDP_MINOR_SHIFT]; + static mdk_thread_t *md_recovery_thread; -int md_size[MAX_MD_DEVS]; extern struct block_device_operations md_fops; static devfs_handle_t devfs_handle; @@ -130,6 +138,18 @@ next: NULL, fops: &md_fops, }; +static struct gendisk mdp_gendisk= +{ + major_name: "md", + minor_shift: MDP_MINOR_SHIFT, + max_p: 1<<MDP_MINOR_SHIFT, + part: mdp_hd_struct, + sizes: mdp_size, + nr_real: MAX_MDP_DEVS, + real_devices: NULL, + next: NULL, + fops: &md_fops, +}; /* * Enables to iterate over all existing md arrays @@ -143,65 +163,53 @@ */ dev_mapping_t mddev_map[MAX_MD_DEVS]; -void add_mddev_mapping (mddev_t * mddev, kdev_t dev, void *data) +void add_mddev_mapping (mddev_t * mddev, int unit, void *data) { - unsigned int minor = MINOR(dev); - - if (MAJOR(dev) != MD_MAJOR) { + if (mddev_map[unit].mddev != NULL) { MD_BUG(); return; } - if (mddev_map[minor].mddev != NULL) { - MD_BUG(); - return; - } - mddev_map[minor].mddev = mddev; - mddev_map[minor].data = data; + mddev_map[unit].mddev = mddev; + mddev_map[unit].data = data; } -void del_mddev_mapping (mddev_t * mddev, kdev_t dev) +void del_mddev_mapping (mddev_t * mddev, int unit) { - unsigned int minor = MINOR(dev); - - if (MAJOR(dev) != MD_MAJOR) { - MD_BUG(); - return; - } - if (mddev_map[minor].mddev != mddev) { + if (mddev_map[unit].mddev != mddev) { MD_BUG(); return; } - mddev_map[minor].mddev = NULL; - mddev_map[minor].data = NULL; + mddev_map[unit].mddev = NULL; + mddev_map[unit].data = NULL; } static int md_make_request (request_queue_t *q, int rw, struct buffer_head * bh) { + unsigned int minor = MINOR(bh->b_rdev); mddev_t *mddev = kdev_to_mddev(bh->b_rdev); - if (mddev && mddev->pers) + if (mddev && mddev->pers) { + if (MAJOR(bh->b_rdev) == mdp_major) + /* map the sector for partitions */ + bh->b_rsector += mdp_hd_struct[minor].start_sect; return mddev->pers->make_request(mddev, rw, bh); - else { + } else { buffer_IO_error(bh); return 0; } } -static mddev_t * alloc_mddev (kdev_t dev) +static mddev_t * alloc_mddev (int unit) { mddev_t *mddev; - if (MAJOR(dev) != MD_MAJOR) { - MD_BUG(); - return 0; - } mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL); if (!mddev) return NULL; memset(mddev, 0, sizeof(*mddev)); - mddev->__minor = MINOR(dev); + mddev->__unit = unit; init_MUTEX(&mddev->reconfig_sem); init_MUTEX(&mddev->recovery_sem); init_MUTEX(&mddev->resync_sem); @@ -214,7 +222,7 @@ * personalities can create additional mddevs * if necessary. */ - add_mddev_mapping(mddev, dev, 0); + add_mddev_mapping(mddev, unit, 0); md_list_add(&mddev->all_mddevs, &all_mddevs); MOD_INC_USE_COUNT; @@ -340,7 +348,7 @@ ITERATE_RDEV(mddev,rdev,tmp) { rdev->size &= mask; - md_size[mdidx(mddev)] += rdev->size; + md_size[mdminor(mddev)] += rdev->size; } return 0; } @@ -739,14 +747,19 @@ static void free_mddev (mddev_t *mddev) { + int part; if (!mddev) { MD_BUG(); return; } export_array(mddev); - md_size[mdidx(mddev)] = 0; - md_hd_struct[mdidx(mddev)].nr_sects = 0; + md_size[mdminor(mddev)] = 0; + md_hd_struct[mdminor(mddev)].nr_sects = 0; + foreach_part(part, mddev) { + mdp_size[part] = 0; + mdp_hd_struct[part].nr_sects = 0; + } /* * Make sure nobody else is using this mddev @@ -757,7 +770,7 @@ while (md_atomic_read(&mddev->recovery_sem.count) != 1) schedule(); - del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev))); + del_mddev_mapping(mddev, mdidx(mddev)); md_list_del(&mddev->all_mddevs); MD_INIT_LIST_HEAD(&mddev->all_mddevs); kfree(mddev); @@ -1521,9 +1534,10 @@ printk (UNKNOWN_LEVEL, mdidx(mddev), sb->level); goto abort; } - if (!md_size[mdidx(mddev)]) - md_size[mdidx(mddev)] = sb->size * data_disks; - + if (!md_size[mdminor(mddev)]) + md_size[mdminor(mddev)] = sb->size * data_disks; + mdp_size[mdpminor(mddev)] = md_size[mdminor(mddev)]; + readahead = MD_READAHEAD; if ((sb->level == 0) || (sb->level == 4) || (sb->level == 5)) { readahead = (mddev->sb->chunk_size>>PAGE_SHIFT) * 4 * data_disks; @@ -1533,7 +1547,8 @@ if (sb->level == -3) readahead = 0; } - md_maxreadahead[mdidx(mddev)] = readahead; + md_maxreadahead[mdminor(mddev)] = readahead; + mdp_maxreadahead[mdpminor(mddev)] = readahead; printk(KERN_INFO "md%d: max total readahead window set to %ldk\n", mdidx(mddev), readahead*(PAGE_SIZE/1024)); @@ -1562,6 +1577,8 @@ int chunk_size; struct md_list_head *tmp; mdk_rdev_t *rdev; + int part,minor,pminor; + int blksize, sectsize; if (!mddev->nb_dev) { @@ -1576,7 +1593,9 @@ * Resize disks to align partitions size on a given * chunk size. */ - md_size[mdidx(mddev)] = 0; + minor = mdminor(mddev); + pminor = mdpminor(mddev); + md_size[minor] = 0; /* * Analyze all RAID superblock(s) @@ -1644,21 +1663,42 @@ * device. * Also find largest hardsector size */ - md_hardsect_sizes[mdidx(mddev)] = 512; + sectsize = 512; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; invalidate_device(rdev->dev, 1); if (get_hardsect_size(rdev->dev) - > md_hardsect_sizes[mdidx(mddev)]) - md_hardsect_sizes[mdidx(mddev)] = + > sectsize) + sectsize = get_hardsect_size(rdev->dev); } - md_blocksizes[mdidx(mddev)] = 1024; - if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)]) - md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)]; + + blksize = 1024; + if (blksize < sectsize) + blksize = sectsize; mddev->pers = pers[pnum]; + md_hardsect_sizes[minor] = sectsize; + md_blocksizes[minor] = blksize; + + foreach_part(part, mddev) { + mdp_hardsect_sizes[part] = sectsize; + mdp_blocksizes[part] = blksize; + + /* When partitioned, raid5 really must use page size blocks + * or the stripe cache gets confused + * However, setting hardsect_size for the whole disc + * device confuses partitioning tools. + * So we just change it for remaining partitions + */ + if (pnum == RAID5) { + sectsize = PAGE_SIZE; + blksize = PAGE_SIZE; + } + } + + err = mddev->pers->run(mddev); if (err) { printk("md: pers->run() failed ...\n"); @@ -1673,12 +1713,18 @@ * md_size has units of 1K blocks, which are * twice as large as sectors. */ - md_hd_struct[mdidx(mddev)].start_sect = 0; - register_disk(&md_gendisk, MKDEV(MAJOR_NR,mdidx(mddev)), - 1, &md_fops, md_size[mdidx(mddev)]<<1); - + md_hd_struct[minor].start_sect = 0; read_ahead[MD_MAJOR] = 1024; - return (0); + read_ahead[mdp_major] = 1024; + + register_disk(&md_gendisk, md_kdev(mddev), + 1, &md_fops, md_size[minor]<<1); + if (mdidx(mddev) < MAX_MDP_DEVS) + register_disk(&mdp_gendisk, mdp_kdev(mddev,0), + 1 << MDP_MINOR_SHIFT, &md_fops, + mdp_size[minor] << 1); + + return 0; } #undef TOO_BIG_CHUNKSIZE @@ -1689,6 +1735,7 @@ static int restart_array (mddev_t *mddev) { int err = 0; + int part; /* * Complain if it has no devices @@ -1701,7 +1748,9 @@ OUT(-EBUSY); mddev->ro = 0; - set_device_ro(mddev_to_kdev(mddev), 0); + set_device_ro(md_kdev(mddev), 0); + foreach_part(part, mddev) + set_device_ro(MKDEV(mdp_major,part), 0); printk (KERN_INFO "md: md%d switched to read-write mode.\n", mdidx(mddev)); @@ -1726,7 +1775,7 @@ static int do_md_stop (mddev_t * mddev, int ro) { int err = 0, resync_interrupted = 0; - kdev_t dev = mddev_to_kdev(mddev); + int part; if (atomic_read(&mddev->active)>1) { printk(STILL_IN_USE, mdidx(mddev)); @@ -1755,22 +1804,37 @@ down(&mddev->recovery_sem); up(&mddev->recovery_sem); - invalidate_device(dev, 1); + invalidate_device(md_kdev(mddev), 1); + foreach_part(part, mddev) { + kdev_t pdev = MKDEV(mdp_major, part); + invalidate_device(pdev, 1); + } if (ro) { if (mddev->ro) OUT(-ENXIO); mddev->ro = 1; } else { - if (mddev->ro) - set_device_ro(dev, 0); + if (mddev->ro) { + set_device_ro(md_kdev(mddev), 0); + foreach_part(part, mddev) + set_device_ro(MKDEV(mdp_major, part), 0); + } if (mddev->pers->stop(mddev)) { - if (mddev->ro) - set_device_ro(dev, 1); + if (mddev->ro) { + set_device_ro(md_kdev(mddev), 1); + foreach_part(part, mddev) + set_device_ro(MKDEV(mdp_major, part), +1); + } OUT(-EBUSY); } if (mddev->ro) mddev->ro = 0; + /* unregister from devfs */ + if (mdidx(mddev) < MAX_MDP_DEVS) + devfs_register_partitions(&mdp_gendisk, + mdpminor(mddev), + 1); } if (mddev->sb) { /* @@ -1783,8 +1847,11 @@ } md_update_sb(mddev); } - if (ro) - set_device_ro(dev, 1); + if (ro) { + set_device_ro(md_kdev(mddev),1); + foreach_part(part, mddev) + set_device_ro(MKDEV(mdp_major,part), 1); + } } /* @@ -1864,8 +1931,7 @@ struct md_list_head *tmp; mdk_rdev_t *rdev0, *rdev; mddev_t *mddev; - kdev_t md_kdev; - + int unit; printk("md: autorun ...\n"); while (pending_raid_disks.next != &pending_raid_disks) { @@ -1890,8 +1956,8 @@ * mostly sane superblocks. It's time to allocate the * mddev. */ - md_kdev = MKDEV(MD_MAJOR, rdev0->sb->md_minor); - mddev = kdev_to_mddev(md_kdev); + unit = rdev0->sb->md_minor; + mddev = mddev_map[unit].mddev; if (mddev) { printk("md: md%d already running, cannot run %s\n", mdidx(mddev), partition_name(rdev0->dev)); @@ -1899,12 +1965,12 @@ export_rdev(rdev); continue; } - mddev = alloc_mddev(md_kdev); + mddev = alloc_mddev(unit); if (mddev == NULL) { printk("md: cannot allocate memory for md drive.\n"); break; } - if (md_kdev == countdev) + if (unit == countdev) atomic_inc(&mddev->active); printk("md: created md%d\n", mdidx(mddev)); ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { @@ -2389,6 +2455,11 @@ SET_SB(nr_disks); SET_SB(raid_disks); SET_SB(md_minor); + /* I want to trust the unit number of mddev more than + * md_minorfrom info - neilb + */ + mddev->sb->md_minor = mdidx(mddev); + SET_SB(not_persistent); SET_SB(state); @@ -2448,11 +2519,59 @@ { int ret; - fsync_dev(mddev_to_kdev(mddev)); ret = md_error(mddev, dev); return ret; } +static int md_reread_partition(mddev_t *mddev) +{ + /* copied from ide_revalidate_disk */ + int minor = mdpminor(mddev); + int part; + if (mdidx(mddev) >= MAX_MDP_DEVS) + return 0; + foreach_part(part, mddev) { + if (mdp_hd_struct[part].nr_sects > 0) { + kdev_t dev = MKDEV(mdp_major, part); + + invalidate_device(dev, 1); + + if (mdp_hardsect_sizes[part] < 1024) + set_blocksize(dev, 1024); + else + set_blocksize(dev, mdp_hardsect_sizes[part]); + } + mdp_hd_struct[part].start_sect= 0; + mdp_hd_struct[part].nr_sects = 0; + } + /* and from revalidate_scsidisk.. */ + grok_partitions(&mdp_gendisk, minor >> MDP_MINOR_SHIFT, 1<<MDP_MINOR_SHIFT, + mdp_size[minor]<<1); + return 0; +} + +static int get_geo(kdev_t dev, int *heads, int *sectors) +{ + struct hd_geometry geom; + struct block_device *bdev; + int rv; + + bdev = bdget(dev); + if (!bdev) + return -1; + rv = blkdev_get(bdev, FMODE_READ, 0, BDEV_FILE); + if (rv == 0) { + rv = ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)&geom); + blkdev_put(bdev, BDEV_FILE); + } + if (rv == 0) { + *heads = geom.heads; + *sectors = geom.sectors; + } + bdput(bdev); + return rv; +} + static int md_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -2461,15 +2580,26 @@ struct hd_geometry *loc = (struct hd_geometry *) arg; mddev_t *mddev = NULL; kdev_t dev; + int unit=0; + struct hd_struct *hdp = NULL; if (!md_capable_admin()) return -EACCES; dev = inode->i_rdev; minor = MINOR(dev); - if (minor >= MAX_MD_DEVS) + if (( MAJOR(dev) == MD_MAJOR && minor >= MAX_MD_DEVS) || + ( MAJOR(dev) == mdp_major && minor >= (MAX_MDP_DEVS<<MDP_MINOR_SHIFT))) return -EINVAL; + if (MAJOR(dev) == MD_MAJOR) { + unit = MINOR(dev); + hdp = &md_hd_struct[minor]; + } else if (MAJOR(dev) == mdp_major) { + unit = MINOR(dev) >> MDP_MINOR_SHIFT; + hdp = &mdp_hd_struct[minor]; + } else + BUG(); /* * Commands dealing with the RAID driver but not any * particular array: @@ -2497,8 +2627,9 @@ err = -EINVAL; goto abort; } - err = md_put_user(md_hd_struct[minor].nr_sects, - (long *) arg); + err = md_put_user(hdp->nr_sects, + (long *) arg); + goto done; case BLKFLSBUF: @@ -2546,7 +2677,7 @@ switch (cmd) { case SET_ARRAY_INFO: - mddev = alloc_mddev(dev); + mddev = alloc_mddev(unit); if (!mddev) { err = -ENOMEM; goto abort; @@ -2586,7 +2717,7 @@ /* * possibly make it lock the array ... */ - err = autostart_array((kdev_t)arg, dev); + err = autostart_array((kdev_t)arg, unit); if (err) { printk("md: autostart %s failed!\n", partition_name((kdev_t)arg)); @@ -2642,30 +2773,83 @@ err = do_md_stop (mddev, 1); goto done_unlock; - /* - * We have a problem here : there is no easy way to give a CHS - * virtual geometry. We currently pretend that we have a 2 heads - * 4 sectors (with a BIG number of cylinders...). This drives - * dosfs just mad... ;-) - */ + /* + * The geometry of a raid array is not necessarily well + * defined. There aren't always heads, sectors, and cylinders. + * The geometry is only really useful for fdisk and similar + * tools to align paritions. + * A a raid1 array, presenting the geometry of the underlying + * devices can be useful when partitioning. For + * all other arrays, including raid1 when not partitioning, we +just + * give 2 heads, 4 sectors, and lots of cylinders. + * + */ case HDIO_GETGEO: + { + int sectors, heads; if (!loc) { err = -EINVAL; goto abort_unlock; } - err = md_put_user (2, (char *) &loc->heads); + if (MAJOR(dev) == MD_MAJOR + || mddev->sb->level != 1) { + heads=2; + sectors=4; + } else { + struct md_list_head *tmp; + mdk_rdev_t *rdev, *found_rdev = NULL; + int rd = -1; + + ITERATE_RDEV(mddev,rdev,tmp) { + mdp_disk_t *d; + d = &mddev->sb->disks[rdev->desc_nr]; + if (disk_active(d) && + (rd == -1 || d->raid_disk < rd)) { + rd = d->raid_disk; + found_rdev = rdev; + } + } + if (!found_rdev || + get_geo(found_rdev->dev, &heads, §ors)!=0) { + heads = 2; + sectors = 4; + } + } + err = md_put_user ((char)heads, (char *) &loc->heads); if (err) goto abort_unlock; - err = md_put_user (4, (char *) &loc->sectors); + err = md_put_user ((char)sectors, (char *) &loc->sectors); if (err) goto abort_unlock; - err = md_put_user (md_hd_struct[mdidx(mddev)].nr_sects/8, + err = md_put_user (hdp->nr_sects/((int)heads*(int)sectors), (short *) &loc->cylinders); if (err) goto abort_unlock; - err = md_put_user (md_hd_struct[minor].start_sect, + err = md_put_user (hdp->start_sect, (long *) &loc->start); goto done_unlock; + } + + case BLKPG: + if (MAJOR(dev) == MD_MAJOR) { + printk(KERN_WARNING "%s(pid %d) tried BLKPG ioctl on +MD device. Should use MDP device.\n", + current->comm, current->pid); + err = -EINVAL; + } + else + err = blk_ioctl(dev, cmd, arg); + goto done_unlock; + + case BLKRRPART: /* Re-read partition tables */ + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (MAJOR(dev) == MD_MAJOR) { + printk(KERN_WARNING "%s(pid %d) tried BLKRRPART ioctl +on MD device. Should use MDP device.\n", + current->comm, current->pid); + err = -EINVAL; + } + else + err = md_reread_partition(mddev); + goto done_unlock; } /* @@ -3083,7 +3267,7 @@ if (mddev->nb_dev) { if (mddev->pers) sz += sprintf(page + sz, "\n %d blocks", - md_size[mdidx(mddev)]); + md_size[mdminor(mddev)]); else sz += sprintf(page + sz, "\n %d blocks", size); } @@ -3496,10 +3680,15 @@ md_hardsect_sizes[i] = 512; md_maxreadahead[i] = MD_READAHEAD; } - blksize_size[MAJOR_NR] = md_blocksizes; - blk_size[MAJOR_NR] = md_size; - max_readahead[MAJOR_NR] = md_maxreadahead; - hardsect_size[MAJOR_NR] = md_hardsect_sizes; + blksize_size[MD_MAJOR] = md_blocksizes; + blk_size[MD_MAJOR] = md_size; + max_readahead[MD_MAJOR] = md_maxreadahead; + hardsect_size[MD_MAJOR] = md_hardsect_sizes; + + blksize_size[mdp_major] = mdp_blocksizes; + blk_size[mdp_major] = mdp_size; + max_readahead[mdp_major] = mdp_maxreadahead; + hardsect_size[mdp_major] = mdp_hardsect_sizes; dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); @@ -3517,11 +3706,23 @@ MD_MAJOR_VERSION, MD_MINOR_VERSION, MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS); - if (devfs_register_blkdev (MAJOR_NR, "md", &md_fops)) + if (devfs_register_blkdev (MD_MAJOR, "md", &md_fops)) { - printk (KERN_ALERT "md: Unable to get major %d for md\n", MAJOR_NR); + printk (KERN_ALERT "md: Unable to get major %d for md\n", MD_MAJOR); return (-1); } +#ifdef MDP_MAJOR + mdp_major = MDP_MAJOR; + if (devfs_register_blkdev (mdp_major, "mdp", &md_fops)) +#else + mdp_major = register_blkdev(0, "mdp", &md_fops); + if (mdp_major <= 0) +#endif + { + printk(KERN_ALERT "md: Unable to get major %d for md\n", mdp_major); + return -1; + } + mdp_gendisk.major = mdp_major; devfs_handle = devfs_mk_dir (NULL, "md", NULL); /* we don't use devfs_register_series because we want to fill md_hd_struct */ for (minor=0; minor < MAX_MD_DEVS; ++minor) { @@ -3533,13 +3734,16 @@ } /* forward all md request to md_make_request */ - blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_make_request); + blk_queue_make_request(BLK_DEFAULT_QUEUE(MD_MAJOR), md_make_request); + blk_queue_make_request(BLK_DEFAULT_QUEUE(mdp_major), md_make_request); - read_ahead[MAJOR_NR] = INT_MAX; + read_ahead[MD_MAJOR] = INT_MAX; + read_ahead[mdp_major] = INT_MAX; md_gendisk.next = gendisk_head; - gendisk_head = &md_gendisk; + mdp_gendisk.next = gendisk_head; + gendisk_head = &mdp_gendisk; md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); if (!md_recovery_thread) @@ -3706,17 +3910,17 @@ extern kdev_t name_to_kdev_t(char *line) md__init; void md__init md_setup_drive(void) { - int minor, i; + int unit, i; kdev_t dev; mddev_t*mddev; kdev_t devices[MD_SB_DISKS+1]; - for (minor = 0; minor < MAX_MD_DEVS; minor++) { + for (unit = 0; unit < MAX_MD_DEVS; unit++) { int err = 0; char *devname; mdu_disk_info_t dinfo; - if ((devname = md_setup_args.device_names[minor]) == 0) continue; + if ((devname = md_setup_args.device_names[unit]) == 0) continue; for (i = 0; i < MD_SB_DISKS && devname != 0; i++) { @@ -3740,34 +3944,34 @@ } devices[i] = dev; - md_setup_args.device_set[minor] = 1; + md_setup_args.device_set[unit] = 1; devname = p; } devices[i] = 0; - if (md_setup_args.device_set[minor] == 0) + if (md_setup_args.device_set[unit] == 0) continue; - if (mddev_map[minor].mddev) { - printk("md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", minor); + if (mddev_map[unit].mddev) { + printk("md: Ignoring md=%d, already autodetected. (Use +raid=noautodetect)\n", unit); continue; } - printk("md: Loading md%d: %s\n", minor, md_setup_args.device_names[minor]); + printk("md: Loading md%d: %s\n", unit, +md_setup_args.device_names[unit]); - mddev = alloc_mddev(MKDEV(MD_MAJOR,minor)); + mddev = alloc_mddev(unit); if (mddev == NULL) { - printk("md: kmalloc failed - cannot start array %d\n", minor); + printk("md: kmalloc failed - cannot start array %d\n", unit); continue; } - if (md_setup_args.pers[minor]) { + if (md_setup_args.pers[unit]) { /* non-persistent */ mdu_array_info_t ainfo; - ainfo.level = pers_to_level(md_setup_args.pers[minor]); + ainfo.level = pers_to_level(md_setup_args.pers[unit]); ainfo.size = 0; ainfo.nr_disks =0; ainfo.raid_disks =0; - ainfo.md_minor =minor; + ainfo.md_minor =unit; ainfo.not_persistent = 1; ainfo.state = MD_SB_CLEAN; @@ -3776,7 +3980,7 @@ ainfo.failed_disks = 0; ainfo.spare_disks = 0; ainfo.layout = 0; - ainfo.chunk_size = md_setup_args.chunk[minor]; + ainfo.chunk_size = md_setup_args.chunk[unit]; err = set_array_info(mddev, &ainfo); for (i = 0; !err && (dev = devices[i]); i++) { dinfo.number = i; @@ -3803,7 +4007,7 @@ if (err) { mddev->sb_dirty = 0; do_md_stop(mddev, 0); - printk("md: starting md%d failed\n", minor); + printk("md: starting md%d failed\n", unit); } } } @@ -3870,7 +4074,8 @@ md_unregister_thread(md_recovery_thread); devfs_unregister(devfs_handle); - devfs_unregister_blkdev(MAJOR_NR,"md"); + devfs_unregister_blkdev(MD_MAJOR,"md"); + devfs_unregister_blkdev(mdp_major,"mdp"); unregister_reboot_notifier(&md_notifier); unregister_sysctl_table(raid_table_header); #ifdef CONFIG_PROC_FS @@ -3885,11 +4090,25 @@ } gendisk_ptr = & (*gendisk_ptr)->next; } - blk_dev[MAJOR_NR].queue = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; - max_readahead[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + gendisk_ptr = &gendisk_head; + while (*gendisk_ptr) { + if (*gendisk_ptr == &mdp_gendisk) { + *gendisk_ptr = mdp_gendisk.next; + break; + } + gendisk_ptr = & (*gendisk_ptr)->next; + } + blk_dev[MD_MAJOR].queue = NULL; + blksize_size[MD_MAJOR] = NULL; + blk_size[MD_MAJOR] = NULL; + max_readahead[MD_MAJOR] = NULL; + hardsect_size[MD_MAJOR] = NULL; + + blk_dev[mdp_major].queue = NULL; + blksize_size[mdp_major] = NULL; + blk_size[mdp_major] = NULL; + max_readahead[mdp_major] = NULL; + hardsect_size[mdp_major] = NULL; free_device_names(); --- ./drivers/md/linear.c 2001/07/01 22:59:38 1.1 +++ ./drivers/md/linear.c 2001/07/01 22:59:47 1.2 @@ -66,8 +66,8 @@ } nb_zone = conf->nr_zones = - md_size[mdidx(mddev)] / conf->smallest->size + - ((md_size[mdidx(mddev)] % conf->smallest->size) ? 1 : 0); + md_size[mdminor(mddev)] / conf->smallest->size + + ((md_size[mdminor(mddev)] % conf->smallest->size) ? 1 : 0); conf->hash_table = kmalloc (sizeof (struct linear_hash) * nb_zone, GFP_KERNEL); --- ./drivers/md/raid0.c 2001/07/01 22:59:38 1.1 +++ ./drivers/md/raid0.c 2001/07/01 22:59:47 1.2 @@ -138,10 +138,10 @@ if (create_strip_zones (mddev)) goto out_free_conf; - printk("raid0 : md_size is %d blocks.\n", md_size[mdidx(mddev)]); + printk("raid0 : md_size is %d blocks.\n", md_size[mdminor(mddev)]); printk("raid0 : conf->smallest->size is %ld blocks.\n", conf->smallest->size); - nb_zone = md_size[mdidx(mddev)]/conf->smallest->size + - (md_size[mdidx(mddev)] % conf->smallest->size ? 1 : 0); + nb_zone = md_size[mdminor(mddev)]/conf->smallest->size + + (md_size[mdminor(mddev)] % conf->smallest->size ? 1 : 0); printk("raid0 : nb_zone is %ld.\n", nb_zone); conf->nr_zones = nb_zone; - To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to [EMAIL PROTECTED]