Hi!
BEWARE: Don't use the following patch in production, it might eat your RAID
set for breakfest.
Attached is a patch which does 4 things:
- tries to solve cleanly the RAID superblock issues on non-x86 architectures
(where sizeof(md_super_t) was bigger than 4096, usually 4104) by introducing
RAID 0.91.x on-disk format which is binary compatible with the 0.90 for
i386 (and at the same time changes it from native-endian to little-endian).
- introduces reserved-bytes setting in raidtab, for which the default is
auto-probed by mkraid if not specified. If non-zero, the RAID array will
make sure first reserved_bytes on the disk are never touched (resynced or
whatever). This makes it possible e.g. to place RAID partition to cylinder 0
on a disk with Sun partition table.
- in raid1.c raid1_kmalloc allocated a wrong size
The patch is against 2.2.14 with 2.2.14-B1 RAID patch, because 2.3.99-pre2
is missing the raid1/5 bits. I can make try to port the remaining files
changes to 2.3.99-pre2 though.
- raidtab.5 man page fix
I'm looking for testers both on x86 and non-x86.
Cheers,
Jakub
___________________________________________________________________
Jakub Jelinek | [EMAIL PROTECTED] | http://sunsite.mff.cuni.cz/~jj
Linux version 2.3.99-pre2 on a sparc64 machine (1343.49 BogoMips)
___________________________________________________________________
--- linux/arch/sparc64/kernel/ioctl32.c.jj Mon Jan 24 11:36:41 2000
+++ linux/arch/sparc64/kernel/ioctl32.c Fri Mar 10 17:32:37 2000
@@ -2022,12 +2022,14 @@ asmlinkage int sys32_ioctl(unsigned int
/* 0x09 */
case /* RAID_VERSION */ _IOR (MD_MAJOR, 0x10, char[12]):
- case /* GET_ARRAY_INFO */ _IOR (MD_MAJOR, 0x11, char[72]):
+ case /* GET_ARRAY_INFO */ _IOR (MD_MAJOR, 0x11, char[128]):
+ case /* OLD_GET_ARRAY_INFO */ _IOR (MD_MAJOR, 0x11, char[72]):
case /* GET_DISK_INFO */ _IOR (MD_MAJOR, 0x12, char[20]):
case /* CLEAR_ARRAY */ _IO (MD_MAJOR, 0x20):
case /* ADD_NEW_DISK */ _IOW (MD_MAJOR, 0x21, char[20]):
case /* HOT_REMOVE_DISK */ _IO (MD_MAJOR, 0x22):
- case /* SET_ARRAY_INFO */ _IOW (MD_MAJOR, 0x23, char[72]):
+ case /* SET_ARRAY_INFO */ _IOW (MD_MAJOR, 0x23, char[128]):
+ case /* OLD_SET_ARRAY_INFO */ _IOW (MD_MAJOR, 0x23, char[72]):
case /* SET_DISK_INFO */ _IO (MD_MAJOR, 0x24):
case /* WRITE_RAID_INFO */ _IO (MD_MAJOR, 0x25):
case /* UNPROTECT_ARRAY */ _IO (MD_MAJOR, 0x26):
--- linux/drivers/block/md.c.jj Mon Jan 24 11:36:42 2000
+++ linux/drivers/block/md.c Thu Mar 16 14:29:46 2000
@@ -11,6 +11,8 @@
- kerneld support by Boris Tobotras <[EMAIL PROTECTED]>
- kmod support by: Cyrus Durgin
- RAID0 bugfixes: Mark Anthony Lisher <[EMAIL PROTECTED]>
+ - superblock layout on non-x86 fixes and reserved_bytes support by
+ Jakub Jelinek <[EMAIL PROTECTED]>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -299,9 +301,18 @@ static unsigned int calc_dev_sboffset (k
return size;
}
+static inline unsigned int calc_dev_reserved (mddev_t *mddev)
+{
+ unsigned int reserved = mddev->sb->reserved_bytes;
+
+ reserved += mddev->sb->chunk_size - 1;
+ reserved &= ~(mddev->sb->chunk_size - 1);
+ return reserved / 1024;
+}
+
static unsigned int calc_dev_size (kdev_t dev, mddev_t *mddev, int persistent)
{
- unsigned int size;
+ unsigned int size, reserved;
size = calc_dev_sboffset(dev, mddev, persistent);
if (!mddev->sb) {
@@ -310,9 +321,25 @@ static unsigned int calc_dev_size (kdev_
}
if (mddev->sb->chunk_size)
size &= ~(mddev->sb->chunk_size/1024 - 1);
+ reserved = calc_dev_reserved (mddev);
+ if (reserved > size)
+ size = 0;
+ else
+ size -= reserved;
return size;
}
+__u64 __inline__ md_read_events (mdp_super_t *sb)
+{
+ return (((__u64)sb->eventshi) << 32) | sb->eventslo;
+}
+
+void __inline__ md_write_events (__u64 events, mdp_super_t *sb)
+{
+ sb->eventshi = events >> 32;
+ sb->eventslo = events;
+}
+
/*
* We check wether all devices are numbered from 0 to nb_dev-1. The
* order is guaranteed even after device name changes.
@@ -376,28 +403,13 @@ abort:
return 1;
}
-static unsigned int zoned_raid_size (mddev_t *mddev)
+static inline unsigned int zoned_raid_size (mddev_t *mddev)
{
- unsigned int mask;
mdk_rdev_t * rdev;
struct md_list_head *tmp;
- if (!mddev->sb) {
- MD_BUG();
- return -EINVAL;
- }
- /*
- * do size and offset calculations.
- */
- mask = ~(mddev->sb->chunk_size/1024 - 1);
-printk("mask %08x\n", mask);
-
ITERATE_RDEV(mddev,rdev,tmp) {
-printk(" rdev->size: %d\n", rdev->size);
- rdev->size &= mask;
-printk(" masked rdev->size: %d\n", rdev->size);
md_size[mdidx(mddev)] += rdev->size;
-printk(" new md_size: %d\n", md_size[mdidx(mddev)]);
}
return 0;
}
@@ -492,6 +504,17 @@ static void mark_rdev_faulty (mdk_rdev_t
restore_flags(flags);
}
+static unsigned int calc_sb_csum (mdp_super_t * sb)
+{
+ unsigned int disk_csum, csum;
+
+ disk_csum = sb->sb_csum;
+ sb->sb_csum = 0;
+ csum = csum_partial((void *)sb, MD_SB_BYTES, 0);
+ sb->sb_csum = disk_csum;
+ return csum;
+}
+
static int read_disk_sb (mdk_rdev_t * rdev)
{
int ret = -EINVAL;
@@ -518,13 +541,33 @@ static int read_disk_sb (mdk_rdev_t * rd
bh = bread (dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES);
if (bh) {
- sb = (mdp_super_t *) bh->b_data;
- memcpy (rdev->sb, sb, MD_SB_BYTES);
+ sb = rdev->sb;
+ memcpy (sb, (mdp_super_t *) bh->b_data, MD_SB_BYTES);
+ if (sb->md_magic != MD_SB_MAGIC &&
+ sb->md_magic == cpu_to_le32(MD_SB_MAGIC) &&
+ (le32_to_cpu(sb->major_version) > 0 ||
+ le32_to_cpu(sb->minor_version) > 90)) {
+ int i;
+ u32 *sbp = (u32 *) sb;
+
+ for (i = 0; i < MD_SB_WORDS; i++, sbp++)
+ le32_to_cpus(sbp);
+ }
+
+ rdev->csum_valid = calc_sb_csum(sb) == sb->sb_csum;
+
+ if (sb->major_version == 0 && sb->minor_version <= 90 &&
+ sizeof(mdp_old_super_t) >= sizeof(mdp_super_t)) {
+ /* Uh oh, 64bit events member moved half of the superblock */
+ md_write_events(get_unaligned(&((mdp_old_super_t
+*)sb)->events), sb);
+ memmove(sb->gstate_sreserved, ((mdp_old_super_t
+*)sb)->gstate_sreserved,
+ (long)sb + MD_SB_BYTES - (long)(((mdp_old_super_t
+*)sb)->gstate_sreserved));
+ }
} else {
printk (NO_SB,partition_name(rdev->dev));
goto abort;
}
- printk(" [events: %08lx]\n", (unsigned long)get_unaligned(&rdev->sb->events));
+ printk(" [events: %08lx]\n", (unsigned long)md_read_events(rdev->sb));
ret = 0;
abort:
if (bh)
@@ -532,17 +575,6 @@ abort:
return ret;
}
-static unsigned int calc_sb_csum (mdp_super_t * sb)
-{
- unsigned int disk_csum, csum;
-
- disk_csum = sb->sb_csum;
- sb->sb_csum = 0;
- csum = csum_partial((void *)sb, MD_SB_BYTES, 0);
- sb->sb_csum = disk_csum;
- return csum;
-}
-
/*
* Check one RAID superblock for generic plausibility
*/
@@ -569,7 +601,7 @@ static int check_disk_sb (mdk_rdev_t * r
goto abort;
}
- if (calc_sb_csum(sb) != sb->sb_csum)
+ if (!rdev->csum_valid)
printk(BAD_CSUM, partition_name(rdev->dev));
ret = 0;
abort:
@@ -767,7 +799,7 @@ static void print_sb(mdp_super_t *sb)
printk(" UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n",
sb->utime, sb->state, sb->active_disks, sb->working_disks,
sb->failed_disks, sb->spare_disks,
- sb->sb_csum, (unsigned long)get_unaligned(&sb->events));
+ sb->sb_csum, (unsigned long)md_read_events(sb));
for (i = 0; i < MD_SB_DISKS; i++) {
mdp_disk_t *desc;
@@ -827,16 +859,16 @@ static int sb_equal ( mdp_super_t *sb1,
int ret;
mdp_super_t *tmp1, *tmp2;
- tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
- tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
+ tmp1 = kmalloc(MD_SB_GENERIC_CONSTANT_WORDS * 4,GFP_KERNEL);
+ tmp2 = kmalloc(MD_SB_GENERIC_CONSTANT_WORDS * 4,GFP_KERNEL);
if (!tmp1 || !tmp2) {
ret = 0;
goto abort;
}
- *tmp1 = *sb1;
- *tmp2 = *sb2;
+ memcpy(tmp1, sb1, MD_SB_GENERIC_CONSTANT_WORDS * 4);
+ memcpy(tmp2, sb2, MD_SB_GENERIC_CONSTANT_WORDS * 4);
/*
* nr_disks is not constant
@@ -935,7 +967,25 @@ static int write_disk_sb(mdk_rdev_t * rd
}
memset(bh->b_data,0,bh->b_size);
sb = (mdp_super_t *) bh->b_data;
- memcpy(sb, rdev->sb, MD_SB_BYTES);
+ if (rdev->sb->major_version == 0 &&
+ rdev->sb->minor_version <= 90 &&
+ sizeof(mdp_old_super_t) >= sizeof(mdp_super_t)) {
+ /* Uh oh, 64bit events member moved half of the superblock */
+ memcpy(sb, rdev->sb, (long)&sb->eventslo - (long)sb);
+ put_unaligned(md_read_events(rdev->sb), &((mdp_old_super_t
+*)sb)->events);
+ memcpy(((mdp_old_super_t *)sb)->gstate_sreserved,
+rdev->sb->gstate_sreserved,
+ (long)sb + MD_SB_BYTES - (long)(((mdp_old_super_t
+*)sb)->gstate_sreserved));
+ } else
+ memcpy(sb, rdev->sb, MD_SB_BYTES);
+ sb->sb_csum = calc_sb_csum(sb);
+ if (rdev->sb->major_version > 0 ||
+ rdev->sb->minor_version > 90) {
+ int i;
+ u32 *sbp = (u32 *) sb;
+
+ for (i = 0; i < MD_SB_WORDS; i++, sbp++)
+ cpu_to_le32s(sbp);
+ }
mark_buffer_uptodate(bh, 1);
mark_buffer_dirty(bh, 1);
@@ -985,9 +1035,9 @@ static int sync_sbs(mddev_t * mddev)
if (rdev->faulty)
continue;
sb = rdev->sb;
- *sb = *mddev->sb;
+ memcpy(sb, mddev->sb, MD_SB_BYTES);
set_this_disk(mddev, rdev);
- sb->sb_csum = calc_sb_csum(sb);
+ rdev->csum_valid = 1;
}
return 0;
}
@@ -1001,9 +1051,9 @@ int md_update_sb(mddev_t * mddev)
repeat:
mddev->sb->utime = CURRENT_TIME;
- ev = get_unaligned(&mddev->sb->events);
+ ev = md_read_events(mddev->sb);
++ev;
- put_unaligned(ev,&mddev->sb->events);
+ md_write_events(ev,mddev->sb);
if (ev == (__u64)0) {
/*
* oops, this 64-bit counter should never wrap.
@@ -1012,7 +1062,7 @@ repeat:
*/
MD_BUG();
--ev;
- put_unaligned(ev,&mddev->sb->events);
+ md_write_events(ev,mddev->sb);
}
sync_sbs(mddev);
@@ -1038,7 +1088,7 @@ repeat:
printk("%s ", partition_name(rdev->dev));
if (!rdev->faulty) {
printk("[events: %08lx]",
- (unsigned long)get_unaligned(&rdev->sb->events));
+ (unsigned long)md_read_events(rdev->sb));
err += write_disk_sb(rdev);
} else
printk(")\n");
@@ -1124,7 +1174,8 @@ static int md_import_device (kdev_t newd
rdev->old_dev = MKDEV(rdev->sb->this_disk.major,
rdev->sb->this_disk.minor);
rdev->desc_nr = rdev->sb->this_disk.number;
- }
+ } else
+ rdev->csum_valid = 1;
md_list_add(&rdev->all, &all_raid_disks);
MD_INIT_LIST_HEAD(&rdev->pending);
@@ -1220,16 +1271,16 @@ static int analyze_sbs (mddev_t * mddev)
* only as a last resort. (decrease it's age by
* one event)
*/
- if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) {
- __u64 ev = get_unaligned(&rdev->sb->events);
+ if (!rdev->csum_valid) {
+ __u64 ev = md_read_events(rdev->sb);
if (ev != (__u64)0) {
--ev;
- put_unaligned(ev,&rdev->sb->events);
+ md_write_events(ev,rdev->sb);
}
}
printk("%s's event counter: %08lx\n", partition_name(rdev->dev),
- (unsigned long)get_unaligned(&rdev->sb->events));
+ (unsigned long)md_read_events(rdev->sb));
if (!freshest) {
freshest = rdev;
continue;
@@ -1237,8 +1288,8 @@ static int analyze_sbs (mddev_t * mddev)
/*
* Find the newest superblock version
*/
- ev1 = get_unaligned(&rdev->sb->events);
- ev2 = get_unaligned(&freshest->sb->events);
+ ev1 = md_read_events(rdev->sb);
+ ev2 = md_read_events(freshest->sb);
if (ev1 != ev2) {
out_of_date = 1;
if (ev1 > ev2)
@@ -1249,7 +1300,7 @@ static int analyze_sbs (mddev_t * mddev)
printk(OUT_OF_DATE);
printk("freshest: %s\n", partition_name(freshest->dev));
}
- memcpy (sb, freshest->sb, sizeof(*sb));
+ memcpy (sb, freshest->sb, MD_SB_BYTES);
/*
* at this point we have picked the 'best' superblock
@@ -1262,8 +1313,8 @@ static int analyze_sbs (mddev_t * mddev)
* Kick all non-fresh devices faulty
*/
__u64 ev1, ev2;
- ev1 = get_unaligned(&rdev->sb->events);
- ev2 = get_unaligned(&sb->events);
+ ev1 = md_read_events(rdev->sb);
+ ev2 = md_read_events(sb);
++ev1;
if (ev1 < ev2) {
printk("md: kicking non-fresh %s from array!\n",
@@ -1283,8 +1334,8 @@ static int analyze_sbs (mddev_t * mddev)
MD_BUG();
goto abort;
}
- ev1 = get_unaligned(&rdev->sb->events);
- ev2 = get_unaligned(&sb->events);
+ ev1 = md_read_events(rdev->sb);
+ ev2 = md_read_events(sb);
ev3 = ev2;
--ev3;
if ((rdev->dev != rdev->old_dev) &&
@@ -1451,7 +1502,7 @@ abort:
static int device_size_calculation (mddev_t * mddev)
{
- int data_disks = 0, persistent;
+ int data_disks, persistent;
unsigned int readahead;
mdp_super_t *sb = mddev->sb;
struct md_list_head *tmp;
@@ -1463,6 +1514,7 @@ static int device_size_calculation (mdde
* because device size has to be modulo chunk_size)
*/
persistent = !mddev->sb->not_persistent;
+ mddev->reserved = calc_dev_reserved(mddev);
ITERATE_RDEV(mddev,rdev,tmp) {
if (rdev->faulty)
continue;
@@ -1480,23 +1532,18 @@ static int device_size_calculation (mdde
}
}
+ data_disks = 1;
switch (sb->level) {
case -3:
- data_disks = 1;
- break;
case -2:
- data_disks = 1;
- break;
- case -1:
- zoned_raid_size(mddev);
- data_disks = 1;
break;
case 0:
- zoned_raid_size(mddev);
data_disks = sb->raid_disks;
+ /* Fall through */
+ case -1:
+ zoned_raid_size(mddev);
break;
case 1:
- data_disks = 1;
break;
case 4:
case 5:
@@ -2086,7 +2133,7 @@ static int get_version (void * arg)
}
#define SET_FROM_SB(x) info.x = mddev->sb->x
-static int get_array_info (mddev_t * mddev, void * arg)
+static int get_array_info (mddev_t * mddev, unsigned int cmd, void * arg)
{
mdu_array_info_t info;
@@ -2103,6 +2150,7 @@ static int get_array_info (mddev_t * mdd
SET_FROM_SB(raid_disks);
SET_FROM_SB(md_minor);
SET_FROM_SB(not_persistent);
+ SET_FROM_SB(reserved_bytes);
SET_FROM_SB(utime);
SET_FROM_SB(state);
@@ -2114,7 +2162,10 @@ static int get_array_info (mddev_t * mdd
SET_FROM_SB(layout);
SET_FROM_SB(chunk_size);
- if (md_copy_to_user(arg, &info, sizeof(info)))
+ if (cmd == OLD_GET_ARRAY_INFO) {
+ if (md_copy_to_user(arg, &info, sizeof(mdu_old_array_info_t)))
+ return -EFAULT;
+ } else if (md_copy_to_user(arg, &info, sizeof(info)))
return -EFAULT;
return 0;
@@ -2403,7 +2454,7 @@ abort_export:
}
#define SET_SB(x) mddev->sb->x = info.x
-static int set_array_info (mddev_t * mddev, void * arg)
+static int set_array_info (mddev_t * mddev, unsigned int cmd, void * arg)
{
mdu_array_info_t info;
@@ -2413,14 +2464,30 @@ static int set_array_info (mddev_t * mdd
return -EBUSY;
}
- if (md_copy_from_user(&info, arg, sizeof(info)))
+ if (cmd == OLD_SET_ARRAY_INFO) {
+ memset(&info, 0, sizeof(info));
+ if (md_copy_from_user(&info, arg,
+ sizeof(mdu_old_array_info_t)))
+ return -EFAULT;
+ } else if (md_copy_from_user(&info, arg, sizeof(info)))
return -EFAULT;
if (alloc_array_sb(mddev))
return -ENOMEM;
mddev->sb->major_version = MD_MAJOR_VERSION;
- mddev->sb->minor_version = MD_MINOR_VERSION;
+ if (MD_MAJOR_VERSION == 0 && MD_MINOR_VERSION == 91 &&
+ sizeof(mdp_old_super_t) == sizeof(mdp_super_t) &&
+ !info.reserved_bytes)
+ /* Change between 0.90 and 0.91 is relevant only
+ * to architectures where 0.90 superblock was longer
+ * than MD_SB_BYTES (unless non-zero reserved_bytes is
+ * used), so lets do users of ia32 a favor
+ * and stay up and down compatible.
+ */
+ mddev->sb->minor_version = 90;
+ else
+ mddev->sb->minor_version = MD_MINOR_VERSION;
mddev->sb->patch_version = MD_PATCHLEVEL_VERSION;
mddev->sb->ctime = CURRENT_TIME;
@@ -2440,6 +2507,9 @@ static int set_array_info (mddev_t * mdd
SET_SB(layout);
SET_SB(chunk_size);
+ if (info.major_version > 0 || info.minor_version >= 91)
+ SET_SB(reserved_bytes);
+
mddev->sb->md_magic = MD_SB_MAGIC;
/*
@@ -2567,6 +2637,7 @@ static int md_ioctl (struct inode *inode
switch (cmd)
{
case SET_ARRAY_INFO:
+ case OLD_SET_ARRAY_INFO:
case START_ARRAY:
if (mddev) {
printk("array md%d already exists!\n",
@@ -2580,6 +2651,7 @@ static int md_ioctl (struct inode *inode
switch (cmd)
{
case SET_ARRAY_INFO:
+ case OLD_SET_ARRAY_INFO:
mddev = alloc_mddev(dev);
if (!mddev) {
err = -ENOMEM;
@@ -2593,7 +2665,7 @@ static int md_ioctl (struct inode *inode
printk("ioctl, reason %d, cmd %d\n", err, cmd);
goto abort;
}
- err = set_array_info(mddev, (void *)arg);
+ err = set_array_info(mddev, cmd, (void *)arg);
if (err) {
printk("couldnt set array info. %d\n", err);
goto abort;
@@ -2635,7 +2707,8 @@ static int md_ioctl (struct inode *inode
switch (cmd)
{
case GET_ARRAY_INFO:
- err = get_array_info(mddev, (void *)arg);
+ case OLD_GET_ARRAY_INFO:
+ err = get_array_info(mddev, cmd, (void *)arg);
goto done_unlock;
case GET_DISK_INFO:
@@ -3895,7 +3968,7 @@ md__initfunc(void do_md_setup(char *str,
chunk_size = ints[i++]; /* Chunksize */
fault = ints[i++]; /* Faultlevel */
- pers = pers | chunk_size | (fault << FAULT_SHIFT);
+ pers = pers | chunk_size | (fault << FAULT_SHIFT);
while( str && (dev = name_to_kdev_t(str))) {
do_md_add (minor, dev);
@@ -4018,7 +4091,8 @@ static void md_geninit (struct gendisk *
md_gendisk.part[i].nr_sects = 0;
}
- printk("md.c: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
+ if (sizeof(mdp_super_t) != MD_SB_BYTES)
+ panic("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
blksize_size[MD_MAJOR] = md_blocksizes;
md_set_global_readahead(md_maxreadahead);
@@ -4027,4 +4101,3 @@ static void md_geninit (struct gendisk *
proc_register(&proc_root, &proc_md);
#endif
}
-
--- linux/drivers/block/linear.c.jj Mon Oct 4 14:21:20 1999
+++ linux/drivers/block/linear.c Thu Mar 9 15:09:46 2000
@@ -151,7 +151,7 @@ static int linear_map (mddev_t *mddev, k
block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset);
*rdev = tmp_dev->dev;
- *rsector = (block - tmp_dev->offset) << 1;
+ *rsector = (block - tmp_dev->offset + mddev->reserved) << 1;
return 0;
}
--- linux/drivers/block/raid0.c.jj Mon Jan 24 11:36:42 2000
+++ linux/drivers/block/raid0.c Fri Mar 10 14:58:01 2000
@@ -143,8 +143,8 @@ static int raid0_run (mddev_t *mddev)
printk("raid0 : nb_zone is %d.\n", nb_zone);
conf->nr_zones = nb_zone;
- printk("raid0 : Allocating %d bytes for hash.\n",
- sizeof(struct raid0_hash)*nb_zone);
+ printk("raid0 : Allocating %ld bytes for hash.\n",
+ (long)sizeof(struct raid0_hash)*nb_zone);
conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone);
if (!conf->hash_table)
@@ -229,7 +229,7 @@ static int raid0_map (mddev_t *mddev, kd
struct strip_zone *zone;
mdk_rdev_t *tmp_dev;
int blk_in_chunk, chunksize_bits, chunk, chunk_size;
- long block, rblock;
+ unsigned long block, rblock;
chunk_size = mddev->param.chunk_size >> 10;
chunksize_bits = ffz(~chunk_size);
@@ -237,7 +237,7 @@ static int raid0_map (mddev_t *mddev, kd
hash = conf->hash_table + block / conf->smallest->size;
if (hash - conf->hash_table > conf->nr_zones) {
- printk(KERN_DEBUG "raid0_map: invalid block %ul\n", block);
+ printk(KERN_DEBUG "raid0_map: invalid block %lu\n", block);
return -1;
}
@@ -261,7 +261,7 @@ static int raid0_map (mddev_t *mddev, kd
blk_in_chunk = block & (chunk_size -1);
chunk = (block - zone->zone_offset) / (zone->nb_dev << chunksize_bits);
tmp_dev = zone->dev[(block >> chunksize_bits) % zone->nb_dev];
- rblock = (chunk << chunksize_bits) + blk_in_chunk + zone->dev_offset;
+ rblock = (chunk << chunksize_bits) + blk_in_chunk + zone->dev_offset +
+mddev->reserved;
*rdev = tmp_dev->dev;
*rsector = rblock << 1;
--- linux/drivers/block/raid5.c.jj Mon Oct 4 14:22:12 1999
+++ linux/drivers/block/raid5.c Thu Mar 9 17:24:46 2000
@@ -586,7 +586,7 @@ static void raid5_build_block (struct st
mddev_t *mddev = conf->mddev;
char *b_data;
kdev_t dev = mddev_to_kdev(mddev);
- int block = sh->sector / (sh->size >> 9);
+ int block = sh->sector / (sh->size >> 9) + (mddev->reserved << 1);
b_data = ((volatile struct buffer_head *) bh)->b_data;
memset (bh, 0, sizeof (struct buffer_head));
@@ -1462,7 +1462,7 @@ static int __check_consistency (mddev_t
static int check_consistency (mddev_t *mddev)
{
- if (__check_consistency(mddev, 0))
+ if (__check_consistency(mddev, mddev->reserved))
/*
* We are not checking this currently, as it's legitimate to have
* an inconsistent array, at creation time.
--- linux/drivers/block/raid1.c.jj Mon Oct 4 14:22:09 1999
+++ linux/drivers/block/raid1.c Thu Mar 16 18:15:08 2000
@@ -40,7 +40,7 @@ static void * raid1_kmalloc (int size)
* simply can not afford to fail an allocation because
* there is no failure return path (eg. make_request())
*/
- while (!(ptr = kmalloc (sizeof (raid1_conf_t), GFP_KERNEL)))
+ while (!(ptr = kmalloc (size, GFP_KERNEL)))
printk ("raid1: out of memory, retrying...\n");
memset(ptr, 0, size);
@@ -266,6 +266,7 @@ static int raid1_make_request (mddev_t *
memcpy(bh_req, bh, sizeof(*bh));
bh_req->b_end_io = raid1_end_request;
bh_req->b_dev_id = r1_bh;
+ bh_req->b_rsector += (mddev->reserved << 1);
map_and_make_request (rw, bh_req);
return 0;
}
@@ -311,7 +312,7 @@ static int raid1_make_request (mddev_t *
mirror_bh[i]->b_blocknr = bh->b_blocknr;
mirror_bh[i]->b_dev = bh->b_dev;
mirror_bh[i]->b_rdev = conf->mirrors[i].dev;
- mirror_bh[i]->b_rsector = bh->b_rsector;
+ mirror_bh[i]->b_rsector = bh->b_rsector + (mddev->reserved << 1);
mirror_bh[i]->b_state = (1<<BH_Req) | (1<<BH_Dirty);
if (lowprio)
mirror_bh[i]->b_state |= (1<<BH_LowPrio);
@@ -866,7 +867,7 @@ static int __check_consistency (mddev_t
static int check_consistency (mddev_t *mddev)
{
- if (__check_consistency(mddev, 0))
+ if (__check_consistency(mddev, mddev->reserved))
/*
* we do not do this currently, as it's perfectly possible to
* have an inconsistent array when it's freshly created. Only
--- linux/include/linux/raid/md_p.h.jj Mon Oct 4 15:41:29 1999
+++ linux/include/linux/raid/md_p.h Fri Mar 10 09:55:05 2000
@@ -115,8 +115,9 @@ typedef struct mdp_superblock_s {
__u32 not_persistent; /* 12 does it have a persistent superblock */
__u32 set_uuid1; /* 13 Raid set identifier #2 */
__u32 set_uuid2; /* 14 Raid set identifier #3 */
- __u32 set_uuid3; /* 14 Raid set identifier #4 */
- __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16];
+ __u32 set_uuid3; /* 15 Raid set identifier #4 */
+ __u32 reserved_bytes; /* 16 # of reserv. bytes at start of disks */
+ __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 17];
/*
* Generic state information
@@ -128,7 +129,8 @@ typedef struct mdp_superblock_s {
__u32 failed_disks; /* 4 Number of failed disks */
__u32 spare_disks; /* 5 Number of spare disks */
__u32 sb_csum; /* 6 checksum of the whole superblock */
- __u64 events; /* 7 number of superblock updates (64-bit!) */
+ __u32 eventslo; /* 7 number of superblock updates (low bits) */
+ __u32 eventshi; /* 8 number of superblock updates (high bits)*/
__u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
/*
@@ -156,6 +158,47 @@ typedef struct mdp_superblock_s {
mdp_disk_t this_disk;
} mdp_super_t;
+
+typedef struct mdp_old_superblock_s {
+ /*
+ * Constant generic information
+ */
+ __u32 gstate_c[MD_SB_GENERIC_CONSTANT_WORDS];
+
+ /*
+ * Generic state information
+ */
+ __u32 utime; /* 0 Superblock update time */
+ __u32 state; /* 1 State bits (clean, ...) */
+ __u32 active_disks; /* 2 Number of currently active disks */
+ __u32 working_disks; /* 3 Number of working disks */
+ __u32 failed_disks; /* 4 Number of failed disks */
+ __u32 spare_disks; /* 5 Number of spare disks */
+ __u32 sb_csum; /* 6 checksum of the whole superblock */
+ __u64 events; /* 7(8) number of superblock updates (64bit) */
+ __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
+
+ /*
+ * Personality information
+ */
+ __u32 pstate_c[MD_SB_PERSONALITY_WORDS];
+
+ /*
+ * Disks information
+ */
+ mdp_disk_t disks[MD_SB_DISKS];
+
+ /*
+ * Reserved
+ */
+ __u32 reserved[MD_SB_RESERVED_WORDS];
+
+ /*
+ * Active descriptor
+ */
+ mdp_disk_t this_disk;
+
+} mdp_old_super_t;
#endif _MD_P_H
--- linux/include/linux/raid/md.h.jj Wed Feb 16 08:06:11 2000
+++ linux/include/linux/raid/md.h Fri Mar 10 14:30:47 2000
@@ -57,7 +57,7 @@
* Different patchlevel versions are downward and upward compatible.
*/
#define MD_MAJOR_VERSION 0
-#define MD_MINOR_VERSION 90
+#define MD_MINOR_VERSION 91
#define MD_PATCHLEVEL_VERSION 0
extern int md_size[MAX_MD_DEVS];
--- linux/include/linux/raid/md_k.h.jj Mon Oct 4 15:41:29 1999
+++ linux/include/linux/raid/md_k.h Thu Mar 9 14:15:22 2000
@@ -169,6 +169,7 @@ struct mdk_rdev_s
mdp_super_t *sb;
int sb_offset;
+ int csum_valid;
int faulty; /* if faulty do not issue IO requests */
int desc_nr; /* descriptor index in the superblock */
@@ -197,6 +198,7 @@ struct mddev_s
int sb_dirty;
mdu_param_t param;
int ro;
+ int reserved;
unsigned int curr_resync;
unsigned long resync_start;
char *name;
--- linux/include/linux/raid/md_u.h.jj Mon Oct 4 15:41:29 1999
+++ linux/include/linux/raid/md_u.h Thu Mar 9 14:50:12 2000
@@ -20,6 +20,7 @@
/* status */
#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t)
#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t)
+#define OLD_GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_old_array_info_t)
#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13)
@@ -28,6 +29,7 @@
#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t)
#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22)
#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t)
+#define OLD_SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_old_array_info_t)
#define SET_DISK_INFO _IO (MD_MAJOR, 0x24)
#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25)
#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26)
@@ -77,9 +79,53 @@ typedef struct mdu_array_info_s {
* Personality information
*/
int layout; /* 0 the array's physical layout */
- int chunk_size; /* 1 chunk size in bytes */
+ int chunk_size; /* 1 chunk size in bytes */
+
+ int reserved_bytes; /* Number of reserved bytes at the beginning */
+
+ /*
+ * The meaning of these fields can be specified later on
+ * and will be dependent on major/minor version specified
+ * in this structure.
+ * This is so that the ioctl number does not have to change with
+ * every field addition.
+ */
+ int reserved[32 - 19];
} mdu_array_info_t;
+
+typedef struct mdu_old_array_info_s {
+ /*
+ * Generic constant information
+ */
+ int major_version;
+ int minor_version;
+ int patch_version;
+ int ctime;
+ int level;
+ int size;
+ int nr_disks;
+ int raid_disks;
+ int md_minor;
+ int not_persistent;
+
+ /*
+ * Generic state information
+ */
+ int utime; /* 0 Superblock update time */
+ int state; /* 1 State bits (clean, ...) */
+ int active_disks; /* 2 Number of currently active disks */
+ int working_disks; /* 3 Number of working disks */
+ int failed_disks; /* 4 Number of failed disks */
+ int spare_disks; /* 5 Number of spare disks */
+
+ /*
+ * Personality information
+ */
+ int layout; /* 0 the array's physical layout */
+ int chunk_size; /* 1 chunk size in bytes */
+
+} mdu_old_array_info_t;
typedef struct mdu_disk_info_s {
/*
--- linux/include/linux/raid/raid1.h.jj Fri Mar 10 14:32:31 2000
+++ linux/include/linux/raid/raid1.h Thu Mar 16 14:29:13 2000
@@ -29,8 +29,8 @@ struct raid1_private_data {
int last_used;
unsigned long next_sect;
int sect_count;
- mdk_thread_t *thread, *resync_thread;
int resync_mirrors;
+ mdk_thread_t *thread, *resync_thread;
struct mirror_info *spare;
};
--- raidtools-0.90/md-int.h.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/md-int.h Fri Mar 10 14:10:31 2000
@@ -20,6 +20,7 @@
/* don't include the kernel RAID header! */
#define _MD_H
+typedef unsigned long long md_u64;
typedef unsigned int md_u32;
typedef unsigned short md_u16;
typedef unsigned char md_u8;
@@ -52,6 +53,7 @@ struct md_version {
/* status */
#define RAID_VERSION _IOR (MD_MAJOR, 0x10, struct md_version)
#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, md_array_info_t)
+#define OLD_GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, md_old_array_info_t)
#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, md_disk_info_t)
#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13)
@@ -60,6 +62,8 @@ struct md_version {
#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, md_disk_info_t)
#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22)
#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, md_array_info_t)
+#define OLD_SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, md_old_array_info_t)
+
#define SET_DISK_INFO _IO (MD_MAJOR, 0x24)
#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25)
#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26)
@@ -176,14 +180,19 @@ typedef struct md_superblock_s {
md_u32 minor_version; /* 2 minor version ... */
md_u32 patch_version; /* 3 patchlevel version ... */
md_u32 gvalid_words; /* 4 Number of used words in this section */
- md_u32 set_magic; /* 5 Raid set identifier */
+ md_u32 set_uuid0; /* 5 Raid set identifier */
md_u32 ctime; /* 6 Creation time */
md_u32 level; /* 7 Raid personality */
md_u32 size; /* 8 Apparent size of each individual disk */
md_u32 nr_disks; /* 9 total disks in the raid set */
md_u32 raid_disks; /* 10 disks in a fully functional raid set */
md_u32 md_minor; /* 11 preferred MD minor device number */
- md_u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 12];
+ md_u32 not_persistent; /* 12 does it have a persistent superblock */
+ md_u32 set_uuid1; /* 13 Raid set identifier #2 */
+ md_u32 set_uuid2; /* 14 Raid set identifier #3 */
+ md_u32 set_uuid3; /* 15 Raid set identifier #4 */
+ md_u32 reserved_bytes; /* 16 # of reserv. bytes at start of disks */
+ md_u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 17];
/*
* Generic state information
@@ -194,14 +203,19 @@ typedef struct md_superblock_s {
md_u32 working_disks; /* 3 Number of working disks */
md_u32 failed_disks; /* 4 Number of failed disks */
md_u32 spare_disks; /* 5 Number of spare disks */
- md_u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 6];
+ md_u32 sb_csum; /* 6 checksum of the whole superblock */
+ md_u32 eventslo; /* 7 number of superblock updates (low bits) */
+ md_u32 eventshi; /* 8 number of superblock updates (high bits)*/
+ md_u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
/*
* Personality information
*/
md_u32 layout; /* 0 the array's physical layout */
md_u32 chunk_size; /* 1 chunk size in bytes */
- md_u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 2];
+ md_u32 root_pv; /* 2 LV root PV */
+ md_u32 root_block; /* 3 LV root block */
+ md_u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4];
/*
* Disks information
@@ -220,6 +234,49 @@ typedef struct md_superblock_s {
} md_superblock_t;
+typedef struct md_old_superblock_s {
+ /*
+ * Constant generic information
+ */
+ md_u32 gstate_c[MD_SB_GENERIC_CONSTANT_WORDS];
+
+ /*
+ * Generic state information
+ */
+ md_u32 utime; /* 0 Superblock update time */
+ md_u32 state; /* 1 State bits (clean, ...) */
+ md_u32 active_disks; /* 2 Number of currently active disks */
+ md_u32 working_disks; /* 3 Number of working disks */
+ md_u32 failed_disks; /* 4 Number of failed disks */
+ md_u32 spare_disks; /* 5 Number of spare disks */
+ md_u32 sb_csum; /* 6 checksum of the whole superblock */
+ md_u64 events; /* 7(8) number of superblock updates (64bit) */
+ md_u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
+
+ /*
+ * Personality information
+ */
+ md_u32 pstate_c[MD_SB_PERSONALITY_WORDS];
+
+ /*
+ * Disks information
+ */
+ md_descriptor_t disks[MD_SB_DISKS];
+
+ /*
+ * Reserved
+ */
+ md_u32 reserved[MD_SB_RESERVED_WORDS];
+
+ /*
+ * Active descriptor
+ */
+ md_descriptor_t this_disk;
+
+} md_old_superblock_t;
+
+
+
/*
* options passed in raidstart:
*/
@@ -264,7 +321,51 @@ typedef struct md_array_info_s {
md_u32 layout; /* 0 the array's physical layout */
md_u32 chunk_size; /* 1 chunk size in bytes */
+ md_u32 reserved_bytes; /* Number of reserved bytes at the beginning */
+
+ /*
+ * The meaning of these fields can be specified later on
+ * and will be dependent on major/minor version specified
+ * in this structure.
+ * This is so that the ioctl number does not have to change with
+ * every field addition.
+ */
+ md_u32 reserved[32 - 19];
+
} md_array_info_t;
+
+typedef struct md_old_array_info_s {
+ /*
+ * Generic constant information
+ */
+ md_u32 major_version;
+ md_u32 minor_version;
+ md_u32 patch_version;
+ md_u32 ctime;
+ md_u32 level;
+ md_u32 size;
+ md_u32 nr_disks;
+ md_u32 raid_disks;
+ md_u32 md_minor;
+ md_u32 not_persistent;
+
+ /*
+ * Generic state information
+ */
+ md_u32 utime; /* 0 Superblock update time */
+ md_u32 state; /* 1 State bits (clean, ...) */
+ md_u32 active_disks; /* 2 Number of currently active disks */
+ md_u32 working_disks; /* 3 Number of working disks */
+ md_u32 failed_disks; /* 4 Number of failed disks */
+ md_u32 spare_disks; /* 5 Number of spare disks */
+
+ /*
+ * Personality information
+ */
+ md_u32 layout; /* 0 the array's physical layout */
+ md_u32 chunk_size; /* 1 chunk size in bytes */
+
+} md_old_array_info_t;
typedef struct md_disk_info_s {
/*
--- raidtools-0.90/common.h.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/common.h Thu Feb 24 12:16:29 2000
@@ -39,7 +39,7 @@ typedef int kdev_t;
#define RAID_CONFIG "/etc/raidtab"
#define MKRAID_MAJOR_VERSION (0)
-#define MKRAID_MINOR_VERSION (90)
+#define MKRAID_MINOR_VERSION (91)
#define MKRAID_PATCHLEVEL_VERSION (0)
extern int do_quiet_flag;
--- raidtools-0.90/raid_io.c.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/raid_io.c Fri Mar 10 17:56:31 2000
@@ -16,6 +16,7 @@
#include <linux/fs.h> /* for BLKGETSIZE */
#endif
#include <sys/sysmacros.h>
+#include <endian.h>
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 1024
@@ -34,6 +35,35 @@
md_cfg_entry_t *p;
md_superblock_t *sb;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define raid_le16(x) (md_u16)(x)
+#define raid_le32(x) (md_u32)(x)
+#define raid_be16(x) \
+ (md_u16)( \
+ (((md_u16)(x) & (md_u16)0xff00) >> 8) | \
+ (((md_u16)(x) & (md_u16)0x00ff) << 8))
+#define raid_be32(x) \
+ ((md_u32)( \
+ (((md_u32)(x) & (md_u32)0x000000ffUL) << 24) | \
+ (((md_u32)(x) & (md_u32)0x0000ff00UL) << 8) | \
+ (((md_u32)(x) & (md_u32)0x00ff0000UL) >> 8) | \
+ (((md_u32)(x) & (md_u32)0xff000000UL) >> 24) ))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define raid_le16(x) \
+ (md_u16)( \
+ (((md_u16)(x) & (md_u16)0xff00) >> 8) | \
+ (((md_u16)(x) & (md_u16)0x00ff) << 8))
+#define raid_le32(x) \
+ ((md_u32)( \
+ (((md_u32)(x) & (md_u32)0x000000ffUL) << 24) | \
+ (((md_u32)(x) & (md_u32)0x0000ff00UL) << 8) | \
+ (((md_u32)(x) & (md_u32)0x00ff0000UL) >> 8) | \
+ (((md_u32)(x) & (md_u32)0xff000000UL) >> 24) ))
+#define raid_be16(x) (md_u16)(x)
+#define raid_be32(x) (md_u32)(x)
+#else
+#error Unknown byte order
+#endif
#define TIME long long
@@ -81,7 +111,7 @@ void progress (unsigned long blocks, uns
}
#undef F
-#if !(defined(__alpha__) || defined(__sparc_v9__))
+#if !(defined(__alpha__) || (defined(__sparc__) && defined(__arch64__)))
# ifndef __NR__llseek
# ifdef __sparc__
# define __NR__llseek 236
@@ -105,7 +135,7 @@ long long raidseek (unsigned int fd, uns
long long result;
int retval;
-#if defined(__alpha__) || defined(__sparc_v9__)
+#if defined(__alpha__) || (defined(__sparc__) && defined(__arch64__))
return lseek(fd, offset, SEEK_SET);
#else
retval = _llseek (fd, ((unsigned long long) offset) >> 32,
@@ -115,11 +145,39 @@ long long raidseek (unsigned int fd, uns
#endif
}
-int upgrade_sb (int fd, md_superblock_t *sb, md_cfg_entry_t * cfg, int verbose)
+/* Check if this RAID device might contain some embedded partition table.
+ * In that case we try to reserve bytes at the beginning of the RAID array,
+ * so that it does not get smashed.
+ */
+int check_partition_table (int fd, char **name)
+{
+ md_u16 sun_disk_label[256];
+
+ if (raidseek(fd, 0) == -1)
+ return -1;
+ if (read(fd, sun_disk_label, sizeof(sun_disk_label))
+ != sizeof(sun_disk_label))
+ return -1;
+ if (raid_be16(sun_disk_label[254]) == 0xDABE) {
+ md_u16 csum, *p;
+
+ for (csum = 0, p = sun_disk_label;
+ p < sun_disk_label + 256; p++)
+ csum ^= *p;
+ if (!csum) {
+ *name = "Sun disk label";
+ return 1024;
+ }
+ }
+ return 0;
+}
+
+static int upgrade_sb (int fd, md_superblock_t *sb, md_cfg_entry_t * cfg, int verbose)
{
struct stat stat_buf;
md_descriptor_t *disk;
int i;
+ int mkraid_minor_version = MKRAID_MINOR_VERSION;
if (
(sb->major_version == MKRAID_MAJOR_VERSION) &&
@@ -130,6 +188,22 @@ int upgrade_sb (int fd, md_superblock_t
}
if (
+ (MKRAID_MAJOR_VERSION == 0) &&
+ (MKRAID_MINOR_VERSION == 91) &&
+ (sb->major_version == MKRAID_MAJOR_VERSION) &&
+ (sizeof(md_superblock_t) == sizeof(md_old_superblock_t))) {
+ if (sb->minor_version == 90) {
+ /* No need to upgrade from 0.90 to 0.91 on ia32 and
+ * other archs where nothing moves between those
+ * two versions. */
+ fprintf(stderr, "array needs no upgrade\n");
+ return 1;
+ }
+ if (sb->minor_version < 90)
+ mkraid_minor_version = 90;
+ }
+
+ if (
(sb->major_version > MKRAID_MAJOR_VERSION) ||
((sb->major_version == MKRAID_MAJOR_VERSION) &&
(sb->minor_version > MKRAID_MINOR_VERSION)) ||
@@ -143,18 +217,18 @@ int upgrade_sb (int fd, md_superblock_t
if (verbose) {
printf("MD ID: %x\n", sb->md_magic);
printf("Changing MD version from %d.%d.%d to %d.%d.%d.\n",
- sb->major_version, sb->minor_version,sb->patch_version,
- MKRAID_MAJOR_VERSION, MKRAID_MINOR_VERSION,
+ sb->major_version, sb->minor_version, sb->patch_version,
+ MKRAID_MAJOR_VERSION, mkraid_minor_version,
MKRAID_PATCHLEVEL_VERSION);
}
sb->major_version = MKRAID_MAJOR_VERSION;
- sb->minor_version = MKRAID_MINOR_VERSION;
+ sb->minor_version = mkraid_minor_version;
sb->patch_version = MKRAID_PATCHLEVEL_VERSION;
if (verbose)
if ((sb->major_version > 0) || (sb->minor_version >= 50))
- printf("preferred minor %d (md%d)\n", sb->md_minor, sb->md_minor);
+ printf("preferred minor %d (md%d)\n", sb->md_minor,
+sb->md_minor);
if (stat(cfg->md_name,&stat_buf)) {
fprintf(stderr, "%s: file doesn't exist!\n", cfg->md_name);
return 1;
@@ -224,7 +298,7 @@ void print_sb (md_superblock_t *sb)
sb->md_minor);
printf("gvalid_words: %d\n", sb->gvalid_words);
- printf("Raid set ID: %x\n", sb->set_magic);
+ printf("Raid set ID: %x %x %x %x\n", sb->set_uuid0, sb->set_uuid1,
+sb->set_uuid2, sb->set_uuid3);
t = (time_t) sb->ctime;
printf("Creation time: %s", ctime(&t));
t = (time_t) sb->utime;
@@ -245,6 +319,7 @@ void print_sb (md_superblock_t *sb)
printf("Number of working disks: %d\n", sb->working_disks);
printf("Number of failed disks: %d\n", sb->failed_disks);
printf("Number of spare disks: %d\n", sb->spare_disks);
+ printf("Reserved bytes: %d\n", sb->reserved_bytes);
printf("\n");
for (i = 0; i < sb->nr_disks; i++) {
@@ -262,13 +337,16 @@ void print_sb (md_superblock_t *sb)
}
}
-static int sanity_checks (char *name, int fd, int sb_offset,
- int forceSanity, int upgradeArray, md_cfg_entry_t * cfg, int dowrite)
+static int sanity_checks (struct md_version * ver, char *name, int fd,
+ int sb_offset, int forceSanity, int upgradeArray,
+ md_cfg_entry_t * cfg, int dowrite)
{
FILE *fp;
unsigned char tmp[MAX_LINE_LENGTH];
unsigned char buffer[MD_SB_BYTES];
md_superblock_t *phys_sb;
+ char *part_name;
+ int reserve;
/*
* Check if the device is mounted
@@ -288,14 +366,41 @@ static int sanity_checks (char *name, in
fclose(fp);
if (!upgradeArray) {
- if (forceSanity)
- return 0;
if (cfg->array.param.not_persistent)
/*
* We have no business analyzing the contents
* of a superblock-less array.
*/
return 0;
+
+ reserve = check_partition_table(fd, &part_name);
+ if (reserve < 0) {
+ fprintf(stderr, "%s: couldn't read from the start of the
+disk\n", name);
+ return 1;
+ }
+
+ if (cfg->array.param.reserved_bytes & 1) {
+ /* reserved-bytes was not mentioned in the config file */
+ if (reserve && !ver->major && ver->minor <= 90) {
+ if (forceSanity)
+ return 0;
+ fprintf(stderr, "%s appears to contain an embedded %s
+partition table.\n"
+ "Use -f to override.\n", name,
+part_name);
+ return 1;
+ } else if (reserve > (cfg->array.param.reserved_bytes & ~1)) {
+ printf("%s appears to contain an embedded %s partition
+table.\n"
+ "Assuming %d reserved-bytes.\n", name,
+part_name, reserve);
+ cfg->array.param.reserved_bytes = reserve | 1;
+ }
+ } else if (reserve > cfg->array.param.reserved_bytes && !forceSanity) {
+ fprintf(stderr, "%s appears to contain an embedded %s
+partition table which needs\n"
+ "%d reserved bytes, while only %d
+reserved-bytes was requested.\n",
+ name, part_name, reserve,
+cfg->array.param.reserved_bytes);
+ return 1;
+ }
+
+ if (forceSanity)
+ return 0;
/*
* Check if the device contains an ext2 filesystem
*/
@@ -312,7 +417,10 @@ static int sanity_checks (char *name, in
if ((read(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES)
return 1;
phys_sb = (md_superblock_t *) buffer;
- if (phys_sb->md_magic == MD_SB_MAGIC) {
+ if (phys_sb->md_magic == MD_SB_MAGIC ||
+ (raid_le32(phys_sb->md_magic) == MD_SB_MAGIC &&
+ (raid_le32(phys_sb->major_version) > 0 ||
+ raid_le32(phys_sb->minor_version) > 90))) {
fprintf(stderr, "%s appears to be already part of a raid array
-- use -f to\nforce the destruction of the old superblock\n", name);
return 1;
}
@@ -328,7 +436,29 @@ static int sanity_checks (char *name, in
if ((read(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES)
return 1;
phys_sb = (md_superblock_t *) buffer;
+ if (phys_sb->md_magic != MD_SB_MAGIC &&
+ raid_le32(phys_sb->md_magic) == MD_SB_MAGIC &&
+ (raid_le32(phys_sb->major_version) > 0 ||
+ raid_le32(phys_sb->minor_version) > 90)) {
+ md_u32 *p;
+
+ for (p = (md_u32 *)phys_sb; p < (md_u32 *)(phys_sb + 1); p++)
+ *p = raid_le32(*p);
+ }
if (phys_sb->md_magic == MD_SB_MAGIC) {
+ if (phys_sb->major_version == 0 && phys_sb->minor_version == 90 &&
+ sizeof(md_superblock_t) != sizeof(md_old_superblock_t)) {
+ /* Duh, backwards compatibility. */
+ md_u64 events;
+
+ memmove(&events, &((md_old_superblock_t *)phys_sb)->events,
+sizeof(md_u64));
+ phys_sb->eventslo = events;
+ phys_sb->eventshi = events >> 32;
+ memmove(phys_sb->gstate_sreserved,
+ ((md_old_superblock_t *)phys_sb)->gstate_sreserved,
+ (long)phys_sb + MD_SB_BYTES -
+(long)(((md_old_superblock_t *)phys_sb)->gstate_sreserved));
+ }
+
if (dowrite) {
fprintf(stderr, "upgrading superblock on %s ...\n",
name);
@@ -338,15 +468,24 @@ static int sanity_checks (char *name, in
if (upgrade_sb(fd, phys_sb, cfg, dowrite))
return 1;
if (dowrite) {
+ int minor_version = phys_sb->minor_version;
fprintf(stderr, "new superblock:\n");
print_sb(phys_sb);
if (raidseek(fd, sb_offset) == -1)
return 1;
+ if ((phys_sb->major_version ||
+ phys_sb->minor_version > 90) &&
+ MD_SB_MAGIC != raid_le32(MD_SB_MAGIC)) {
+ md_u32 *p;
+
+ for (p = (md_u32 *)phys_sb; p < (md_u32 *)(phys_sb +
+1); p++)
+ *p = raid_le32(*p);
+ }
if ((write(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES) {
fprintf(stderr, "could not write new superblock!\n");
return 1;
}
- printf("sb->minor after write: %d\n", phys_sb->minor_version);
+ printf("sb->minor after write: %d\n", minor_version);
fsync(fd);
}
return 0;
@@ -459,11 +598,10 @@ int analyze_sb (struct md_version * ver,
close(fd);
return 1;
}
-
cfg->sb_block_offset[i] = MD_NEW_SIZE_BLOCKS(nr_blocks);
if (!cfg->array.param.not_persistent) {
printf("disk %d: %s, %ukB, raid superblock at %dkB\n",
i, cfg->device_name[i], nr_blocks, cfg->sb_block_offset[i]);
- if (sanity_checks(cfg->device_name[i], fd,
+ if (sanity_checks(ver, cfg->device_name[i], fd,
cfg->sb_block_offset[i], forceSanity,
upgradeArray, cfg, 0)) {
close(fd);
@@ -475,6 +613,15 @@ int analyze_sb (struct md_version * ver,
printf("disk %d: %s, failed\n", i, cfg->device_name[i]);
}
}
+
+ if (array->param.reserved_bytes & 1)
+ array->param.reserved_bytes &= ~1;
+ if (array->param.reserved_bytes && !ver->major && ver->minor <= 90) {
+ fprintf(stderr, "Non-zero reserved-bytes (%d) is only supported by
+kernel RAID driver %d.%d.%d\n",
+ array->param.reserved_bytes, ver->major, ver->minor,
+ver->patchlevel);
+ return 1;
+ }
+
/*
* second pass, write stuff out ...
*/
@@ -502,7 +649,7 @@ int analyze_sb (struct md_version * ver,
}
cfg->sb_block_offset[i] = MD_NEW_SIZE_BLOCKS(nr_blocks);
- if (sanity_checks(cfg->device_name[i], fd,
+ if (sanity_checks(ver, cfg->device_name[i], fd,
cfg->sb_block_offset[i], forceSanity,
upgradeArray, cfg, 1)) {
close(fd);
--- raidtools-0.90/mkraid.c.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/mkraid.c Fri Mar 10 14:02:21 2000
@@ -19,7 +19,7 @@
#include "popt.h"
#include "version.h"
-void printcfg (md_cfg_entry_t * cfg);
+static void printcfg (md_cfg_entry_t * cfg);
void usage (void) {
printf("usage: mkraid [--configfile] [--version] [--force] [--upgrade]\n");
@@ -43,7 +43,10 @@ int i, ret, file;
#endif
file = open(cfg->md_name,O_RDONLY);
- ret = ioctl(file, SET_ARRAY_INFO, (unsigned long)&cfg->array.param);
+ if (ver->major || ver->minor > 90)
+ ret = ioctl(file, SET_ARRAY_INFO, (unsigned long)&cfg->array.param);
+ else
+ ret = ioctl(file, OLD_SET_ARRAY_INFO, (unsigned long)&cfg->array.param);
if (ret)
return 1;
@@ -275,7 +278,7 @@ abort:
#define P(x) printf("%18s: \t %d\n",#x,cfg->array.param.x)
#define DP(x) printf("%18s: \t %d\n",#x,cfg->array.disks[i].x)
-void printcfg (md_cfg_entry_t * cfg)
+static void printcfg (md_cfg_entry_t * cfg)
{
int i;
@@ -298,6 +301,7 @@ void printcfg (md_cfg_entry_t * cfg)
P(layout);
P(chunk_size);
+ P(reserved_bytes);
for (i = 0; i < cfg->array.param.nr_disks; i++) {
printf("\n");
--- raidtools-0.90/parser.c.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/parser.c Fri Mar 10 11:36:33 2000
@@ -68,6 +68,7 @@ static int process_entry (char *par, cha
}
strcpy(cfg->md_name, val_s);
cfg->array.param.nr_disks = 0;
+ cfg->array.param.reserved_bytes = 1; /* Autodetect */
last = cfg_head;
while (last && last->next) last = last->next;
@@ -143,6 +144,13 @@ static int process_entry (char *par, cha
return 1;
}
array->param.chunk_size = val * MD_BLK_SIZ;
+ return 0;
+ } else if (strcmp(par, "reserved-bytes") == 0) {
+ if (val & 511) {
+ fprintf(stderr, "reserved-bytes %d must be a power of 512\n",
+val);
+ return 1;
+ }
+ array->param.reserved_bytes = val;
return 0;
} else if (strcmp(par, "device") == 0) {
if (array->param.nr_disks == MD_SB_DISKS) {
--- raidtools-0.90/raid5.conf.sample.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/raid5.conf.sample Fri Mar 10 10:06:30 2000
@@ -3,6 +3,7 @@ raiddev /dev/md0
raid-level 5
nr-raid-disks 3
chunk-size 4
+#reserved-bytes 1024
# Parity placement algorithm
--- raidtools-0.90/raid1.conf.sample.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/raid1.conf.sample Fri Mar 10 10:06:46 2000
@@ -4,6 +4,7 @@ raid-level 1
nr-raid-disks 2
nr-spare-disks 0
chunk-size 4
+#reserved-bytes 1024
device /dev/hda1
raid-disk 0
--- raidtools-0.90/raidtab.5.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/raidtab.5 Thu Mar 16 16:20:36 2000
@@ -106,11 +106,20 @@ performance on typical disks with rotati
.TP
\fBchunk-size \fIsize\fR
-Sets the stripe size to \fIsize\fR bytes. Has to be a power of 2 and
+Sets the stripe size to \fIsize\fR kilobytes. Has to be a power of 2 and
has a compilation-time maximum of 4M. (MAX_CHUNK_SIZE in the kernel
driver) typical values are anything from 4k to 128k, the best value
should be determined by experimenting on a given array, alot depends
on the SCSI and disk configuration.
+
+.TP
+\fBreserved-bytes \fIsize\fR
+Reserves at least the first \fIsize\fR bytes on each disks, so that
+it can contain e.g. embedded partition tables or bootblocks.
+Without this, RAID array can happily resync them with something else.
+If reserved-bytes is not specified, then mkraid checks if it finds some
+known partition table or bootblock magic and sets the default accordingly.
+\fIsize\fR must be a multiple of 512.
.TP
\fBdevice \fIdevpath\fR
--- raidtools-0.90/raidtab.sample.jj Tue Aug 3 10:05:53 1999
+++ raidtools-0.90/raidtab.sample Thu Mar 16 16:16:16 2000
@@ -2,7 +2,8 @@
# sample raiddev configuration file
#
-# 'persistent' RAID5 setup, with no spare disks:
+# 'persistent' RAID5 setup, with no spare disks
+# and 4KB chunk size
#
raiddev /dev/md0
raid-level 5
@@ -51,3 +52,23 @@ raiddev /dev/md2
device /dev/sdc1
spare-disk 0
+
+#
+# 'persistent' RAID5 setup, with no spare disks
+# with 1024 bytes at the start of each disk reserved
+# for bootblocks or other things
+#
+raiddev /dev/md3
+ raid-level 5
+ nr-raid-disks 3
+ nr-spare-disks 0
+ persistent-superblock 1
+ chunk-size 4
+ reserved-bytes 1024
+
+ device /dev/sdb1
+ raid-disk 0
+ device /dev/sda1
+ raid-disk 1
+ device /dev/sdc1
+ raid-disk 2