Module Name: src Committed By: oster Date: Sun Sep 17 20:07:39 UTC 2023
Modified Files: src/sbin/raidctl: raidctl.8 raidctl.c src/sys/dev/raidframe: raidframeio.h rf_diskqueue.c rf_diskqueue.h rf_disks.c rf_disks.h rf_driver.c rf_netbsdkintf.c rf_raid.h rf_reconstruct.c Log Message: Implement hot removal of spares and components. From manu@. Implement a long desired feature of automatically incorporating a used spare into the array after a reconstruct. Given the configuration: Components: /dev/wd0e: failed /dev/wd1e: optimal /dev/wd2e: optimal Spares: /dev/wd3e: spare Running 'raidctl -F /dev/wd0e raid0' will now result in the following configuration after a successful rebuild: Components: /dev/wd3e: optimal /dev/wd1e: optimal /dev/wd2e: optimal No spares. Thanks to manu@ for the development of the initial set of changes which allowed the changes to automatically incorporate a used spare to come to fruition. Thanks also to manu@ for useful discussions about and additional testing of these changes. To generate a diff of this commit: cvs rdiff -u -r1.79 -r1.80 src/sbin/raidctl/raidctl.8 cvs rdiff -u -r1.78 -r1.79 src/sbin/raidctl/raidctl.c cvs rdiff -u -r1.11 -r1.12 src/sys/dev/raidframe/raidframeio.h cvs rdiff -u -r1.63 -r1.64 src/sys/dev/raidframe/rf_diskqueue.c cvs rdiff -u -r1.29 -r1.30 src/sys/dev/raidframe/rf_diskqueue.h cvs rdiff -u -r1.93 -r1.94 src/sys/dev/raidframe/rf_disks.c cvs rdiff -u -r1.14 -r1.15 src/sys/dev/raidframe/rf_disks.h cvs rdiff -u -r1.140 -r1.141 src/sys/dev/raidframe/rf_driver.c cvs rdiff -u -r1.413 -r1.414 src/sys/dev/raidframe/rf_netbsdkintf.c cvs rdiff -u -r1.51 -r1.52 src/sys/dev/raidframe/rf_raid.h cvs rdiff -u -r1.128 -r1.129 src/sys/dev/raidframe/rf_reconstruct.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sbin/raidctl/raidctl.8 diff -u src/sbin/raidctl/raidctl.8:1.79 src/sbin/raidctl/raidctl.8:1.80 --- src/sbin/raidctl/raidctl.8:1.79 Tue Jun 14 08:06:18 2022 +++ src/sbin/raidctl/raidctl.8 Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -.\" $NetBSD: raidctl.8,v 1.79 2022/06/14 08:06:18 kre Exp $ +.\" $NetBSD: raidctl.8,v 1.80 2023/09/17 20:07:39 oster Exp $ .\" .\" Copyright (c) 1998, 2002 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -53,7 +53,7 @@ .\" any improvements or extensions that they make and grant Carnegie the .\" rights to redistribute these changes. .\" -.Dd June 13, 2022 +.Dd September 16, 2023 .Dt RAIDCTL 8 .Os .Sh NAME @@ -322,9 +322,10 @@ if necessary, and immediately begins a r This is useful for reconstructing back onto a component after it has been replaced following a failure. .It Fl r Ar component Ar dev -Remove the spare disk specified by +Remove the specified .Ar component -from the set of available spare components. +from the RAID. The component must be in the failed, spare, or spared state +in order to be removed. .It Fl S Ar dev Check the status of parity re-writing, component reconstruction, and component copyback. @@ -1005,10 +1006,9 @@ option will show: .Bd -literal -offset indent Components: /dev/sd1e: optimal - /dev/sd2e: spared + /dev/sd4e: optimal /dev/sd3e: optimal -Spares: - /dev/sd4e: used_spare +No spares. [...] Parity status: clean Reconstruction is 100% complete. @@ -1016,62 +1016,10 @@ Parity Re-write is 100% complete. Copyback is 100% complete. .Ed .Pp -At this point there are at least two options. -First, if -.Pa /dev/sd2e -is known to be good (i.e., the failure was either caused by -.Fl f -or -.Fl F , -or the failed disk was replaced), then a copyback of the data can -be initiated with the -.Fl B -option. -In this example, this would copy the entire contents of -.Pa /dev/sd4e -to -.Pa /dev/sd2e . -Once the copyback procedure is complete, the -status of the device would be (in part): -.Bd -literal -offset indent -Components: - /dev/sd1e: optimal - /dev/sd2e: optimal - /dev/sd3e: optimal -Spares: - /dev/sd4e: spare -.Ed -.Pp -and the system is back to normal operation. -.Pp -The second option after the reconstruction is to simply use -.Pa /dev/sd4e -in place of -.Pa /dev/sd2e -in the configuration file. -For example, the configuration file (in part) might now look like: -.Bd -literal -offset indent -START array -3 0 - -START disks -/dev/sd1e -/dev/sd4e -/dev/sd3e -.Ed -.Pp -This can be done as -.Pa /dev/sd4e -is completely interchangeable with +as .Pa /dev/sd2e -at this point. -Note that extreme care must be taken when -changing the order of the drives in a configuration. -This is one of the few instances where the devices and/or -their orderings can be changed without loss of data! -In general, the ordering of components in a configuration file should -.Em never -be changed. +has been removed and replaced with +.Pa /dev/sd4e . .Pp If a component fails and there are no hot spares available on-line, the status of the RAID set might (in part) look like: @@ -1101,7 +1049,7 @@ Spares: .Pp Reconstruction could then take place using .Fl F -as describe above. +as described above. .Pp A second option is to rebuild directly onto .Pa /dev/sd2e . Index: src/sbin/raidctl/raidctl.c diff -u src/sbin/raidctl/raidctl.c:1.78 src/sbin/raidctl/raidctl.c:1.79 --- src/sbin/raidctl/raidctl.c:1.78 Tue Jun 14 08:06:18 2022 +++ src/sbin/raidctl/raidctl.c Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: raidctl.c,v 1.78 2022/06/14 08:06:18 kre Exp $ */ +/* $NetBSD: raidctl.c,v 1.79 2023/09/17 20:07:39 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. @@ -39,7 +39,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: raidctl.c,v 1.78 2022/06/14 08:06:18 kre Exp $"); +__RCSID("$NetBSD: raidctl.c,v 1.79 2023/09/17 20:07:39 oster Exp $"); #endif @@ -64,6 +64,10 @@ __RCSID("$NetBSD: raidctl.c,v 1.78 2022/ #include "rf_configure.h" #include "prog_ops.h" +#ifndef RAIDFRAME_REMOVE_COMPONENT +#define RAIDFRAME_REMOVE_COMPONENT RAIDFRAME_REMOVE_HOT_SPARE +#endif + #define CONFIGURE_TEST 1 /* must be different from any raidframe ioctl */ void do_ioctl(int, u_long, void *, const char *); @@ -79,7 +83,7 @@ static void set_component_label(int, ch static void init_component_labels(int, int); static void set_autoconfig(int, int, char *); static void add_hot_spare(int, char *); -static void remove_hot_spare(int, char *); +static void remove_component(int, char *); static void rebuild_in_place(int, char *); static void check_status(int,int); static void check_parity(int,int, char *); @@ -236,7 +240,7 @@ main(int argc,char *argv[]) num_options++; break; case 'r': - action = RAIDFRAME_REMOVE_HOT_SPARE; + action = RAIDFRAME_REMOVE_COMPONENT; get_comp(component, optarg, sizeof(component)); num_options++; break; @@ -319,8 +323,8 @@ main(int argc,char *argv[]) case RAIDFRAME_ADD_HOT_SPARE: add_hot_spare(fd, component); break; - case RAIDFRAME_REMOVE_HOT_SPARE: - remove_hot_spare(fd, component); + case RAIDFRAME_REMOVE_COMPONENT: + remove_component(fd, component); break; case RAIDFRAME_CONFIGURE: rf_configure(fd, config_filename, force); @@ -918,22 +922,22 @@ add_hot_spare(int fd, char *component) } static void -remove_hot_spare(int fd, char *component) +remove_component(int fd, char *component) { - RF_SingleComponent_t hot_spare; + RF_SingleComponent_t comp; int component_num; int num_cols; get_component_number(fd, component, &component_num, &num_cols); - hot_spare.row = component_num / num_cols; - hot_spare.column = component_num % num_cols; + comp.row = component_num / num_cols; + comp.column = component_num % num_cols; - strncpy(hot_spare.component_name, component, - sizeof(hot_spare.component_name)); + strncpy(comp.component_name, component, + sizeof(comp.component_name)); - do_ioctl( fd, RAIDFRAME_REMOVE_HOT_SPARE, &hot_spare, - "RAIDFRAME_REMOVE_HOT_SPARE"); + do_ioctl( fd, RAIDFRAME_REMOVE_COMPONENT, &comp, + "RAIDFRAME_REMOVE_COMPONENT"); } static void Index: src/sys/dev/raidframe/raidframeio.h diff -u src/sys/dev/raidframe/raidframeio.h:1.11 src/sys/dev/raidframe/raidframeio.h:1.12 --- src/sys/dev/raidframe/raidframeio.h:1.11 Sat Aug 7 16:19:15 2021 +++ src/sys/dev/raidframe/raidframeio.h Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: raidframeio.h,v 1.11 2021/08/07 16:19:15 thorpej Exp $ */ +/* $NetBSD: raidframeio.h,v 1.12 2023/09/17 20:07:39 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. * All rights reserved. @@ -110,7 +110,8 @@ #define RAIDFRAME_INIT_LABELS _IOW ('r', 21, RF_ComponentLabel_t) #define RAIDFRAME_ADD_HOT_SPARE _IOW ('r', 22, RF_SingleComponent_t) -#define RAIDFRAME_REMOVE_HOT_SPARE _IOW ('r', 23, RF_SingleComponent_t) +#define RAIDFRAME_REMOVE_COMPONENT _IOW ('r', 23, RF_SingleComponent_t) +#define RAIDFRAME_REMOVE_HOT_SPARE RAIDFRAME_REMOVE_COMPONENT #define RAIDFRAME_REBUILD_IN_PLACE _IOW ('r', 24, RF_SingleComponent_t) #define RAIDFRAME_CHECK_PARITY _IOWR ('r', 25, int) #define RAIDFRAME_CHECK_PARITYREWRITE_STATUS _IOWR ('r', 26, int) Index: src/sys/dev/raidframe/rf_diskqueue.c diff -u src/sys/dev/raidframe/rf_diskqueue.c:1.63 src/sys/dev/raidframe/rf_diskqueue.c:1.64 --- src/sys/dev/raidframe/rf_diskqueue.c:1.63 Tue Dec 14 00:46:43 2021 +++ src/sys/dev/raidframe/rf_diskqueue.c Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_diskqueue.c,v 1.63 2021/12/14 00:46:43 mrg Exp $ */ +/* $NetBSD: rf_diskqueue.c,v 1.64 2023/09/17 20:07:39 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -66,7 +66,7 @@ ****************************************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.63 2021/12/14 00:46:43 mrg Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.64 2023/09/17 20:07:39 oster Exp $"); #include <dev/raidframe/raidframevar.h> @@ -187,6 +187,13 @@ rf_ConfigureDiskQueue(RF_Raid_t *raidPtr return (0); } +int +rf_UpdateDiskQueue(RF_DiskQueue_t *diskqueue, RF_RaidDisk_t *disk) +{ + diskqueue->dev = disk->dev; + return(0); +} + static void rf_ShutdownDiskQueueSystem(void *arg) { @@ -255,7 +262,7 @@ rf_ConfigureDiskQueues(RF_ShutdownList_t } spareQueues = &raidPtr->Queues[raidPtr->numCol]; - for (r = 0; r < raidPtr->numSpare; r++) { + for (r = 0; r < raidPtr->maxQueue; r++) { rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], raidPtr->numCol + r, p, raidPtr->sectorsPerDisk, Index: src/sys/dev/raidframe/rf_diskqueue.h diff -u src/sys/dev/raidframe/rf_diskqueue.h:1.29 src/sys/dev/raidframe/rf_diskqueue.h:1.30 --- src/sys/dev/raidframe/rf_diskqueue.h:1.29 Tue Jul 27 03:01:48 2021 +++ src/sys/dev/raidframe/rf_diskqueue.h Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_diskqueue.h,v 1.29 2021/07/27 03:01:48 oster Exp $ */ +/* $NetBSD: rf_diskqueue.h,v 1.30 2023/09/17 20:07:39 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -149,5 +149,6 @@ int rf_ConfigureDiskQueue(RF_Raid_t *, R RF_SectorCount_t, dev_t, int, RF_ShutdownList_t **, RF_AllocListElem_t *); +int rf_UpdateDiskQueue(RF_DiskQueue_t *, RF_RaidDisk_t *); #endif /* !_RF__RF_DISKQUEUE_H_ */ Index: src/sys/dev/raidframe/rf_disks.c diff -u src/sys/dev/raidframe/rf_disks.c:1.93 src/sys/dev/raidframe/rf_disks.c:1.94 --- src/sys/dev/raidframe/rf_disks.c:1.93 Wed Aug 10 01:16:38 2022 +++ src/sys/dev/raidframe/rf_disks.c Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $ */ +/* $NetBSD: rf_disks.c,v 1.94 2023/09/17 20:07:39 oster Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. @@ -60,12 +60,13 @@ ***************************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.94 2023/09/17 20:07:39 oster Exp $"); #include <dev/raidframe/raidframevar.h> #include "rf_raid.h" #include "rf_alloclist.h" +#include "rf_driver.h" #include "rf_utils.h" #include "rf_general.h" #include "rf_options.h" @@ -337,6 +338,14 @@ rf_AllocDiskStructures(RF_Raid_t *raidPt goto fail; } + raidPtr->abortRecon = RF_MallocAndAdd( + entries * sizeof(int), raidPtr->cleanupList); + if (raidPtr->abortRecon == NULL) { + ret = ENOMEM; + goto fail; + } + + return(0); fail: rf_UnconfigureVnodes( raidPtr ); @@ -977,8 +986,8 @@ rf_CheckLabels(RF_Raid_t *raidPtr, RF_Co int rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) { - RF_RaidDisk_t *disks; RF_DiskQueue_t *spareQueues; + RF_RaidDisk_t *disks; int ret; unsigned int bs; int spare_number; @@ -991,10 +1000,10 @@ rf_add_hot_spare(RF_Raid_t *raidPtr, RF_ } rf_lock_mutex2(raidPtr->mutex); - while (raidPtr->adding_hot_spare == 1) { - rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex); + while (raidPtr->changing_components == 1) { + rf_wait_cond2(raidPtr->changing_components_cv, raidPtr->mutex); } - raidPtr->adding_hot_spare = 1; + raidPtr->changing_components = 1; rf_unlock_mutex2(raidPtr->mutex); /* the beginning of the spares... */ @@ -1054,25 +1063,43 @@ rf_add_hot_spare(RF_Raid_t *raidPtr, RF_ } } - spareQueues = &raidPtr->Queues[raidPtr->numCol]; - ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number], - raidPtr->numCol + spare_number, - raidPtr->qType, - raidPtr->sectorsPerDisk, - raidPtr->Disks[raidPtr->numCol + - spare_number].dev, - raidPtr->maxOutstanding, - &raidPtr->shutdownList, - raidPtr->cleanupList); + /* + * We only grow one initialized diskQueue at a time + * spare_number can be lower than raidPtr->maxQueue (update) + * or they can be equal (initialize new queue) + */ + RF_ASSERT(spare_number <= raidPtr->maxQueue); - rf_lock_mutex2(raidPtr->mutex); - raidPtr->numSpare++; - rf_unlock_mutex2(raidPtr->mutex); + spareQueues = &raidPtr->Queues[raidPtr->numCol]; + if (spare_number == raidPtr->maxQueue) { + ret = rf_ConfigureDiskQueue(raidPtr, &spareQueues[spare_number], + raidPtr->numCol + spare_number, + raidPtr->qType, + raidPtr->sectorsPerDisk, + raidPtr->Disks[raidPtr->numCol + + spare_number].dev, + raidPtr->maxOutstanding, + &raidPtr->shutdownList, + raidPtr->cleanupList); + if (ret) + goto fail; + rf_lock_mutex2(raidPtr->mutex); + raidPtr->maxQueue++; + rf_unlock_mutex2(raidPtr->mutex); + } else { + (void)rf_UpdateDiskQueue(&spareQueues[spare_number], + &disks[spare_number]); + } fail: rf_lock_mutex2(raidPtr->mutex); - raidPtr->adding_hot_spare = 0; - rf_signal_cond2(raidPtr->adding_hot_spare_cv); + + if (ret == 0) { + raidPtr->numSpare++; + } + + raidPtr->changing_components = 0; + rf_signal_cond2(raidPtr->changing_components_cv); rf_unlock_mutex2(raidPtr->mutex); return(ret); @@ -1081,56 +1108,140 @@ fail: int rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr) { -#if 0 int spare_number; -#endif + int i; + RF_RaidDisk_t *disk; + struct vnode *vp; + int ret = EINVAL; - if (raidPtr->numSpare==0) { - printf("No spares to remove!\n"); - return(EINVAL); + spare_number = sparePtr->column - raidPtr->numCol; + if (spare_number < 0 || spare_number > raidPtr->numSpare) + return(ret); + + rf_lock_mutex2(raidPtr->mutex); + while (raidPtr->changing_components == 1) { + rf_wait_cond2(raidPtr->changing_components_cv, raidPtr->mutex); } + raidPtr->changing_components = 1; + rf_unlock_mutex2(raidPtr->mutex); - return(EINVAL); /* XXX not implemented yet */ -#if 0 - spare_number = sparePtr->column; + rf_SuspendNewRequestsAndWait(raidPtr); - if (spare_number < 0 || spare_number > raidPtr->numSpare) { - return(EINVAL); - } + disk = &raidPtr->Disks[raidPtr->numCol + spare_number]; + if (disk->status != rf_ds_spare && + disk->status != rf_ds_failed) { + printf("Spare is in use %d\n", disk->status); + ret = EBUSY; + goto out; + } + + vp = raidPtr->raid_cinfo[raidPtr->numCol + spare_number].ci_vp; + raidPtr->raid_cinfo[raidPtr->numCol + spare_number].ci_vp = NULL; + raidPtr->raid_cinfo[raidPtr->numCol + spare_number].ci_dev = 0; + + /* This component was not automatically configured */ + disk->auto_configured = 0; + disk->dev = 0; + disk->numBlocks = 0; + disk->status = rf_ds_failed; + snprintf(disk->devname, sizeof(disk->devname), + "absent_spare%d", spare_number); + rf_close_component(raidPtr, vp, 0); - /* verify that this spare isn't in use... */ + rf_lock_mutex2(raidPtr->mutex); + /* at this point we know spare_number is to be pushed all the way to the end of the array... */ + for (i = raidPtr->numCol + spare_number; i < raidPtr->numCol+raidPtr->numSpare-1; i++) { + /* now we work our way up the spare array, swaping the current one for the next one */ + rf_swap_components(raidPtr, i, i+1); + } + + raidPtr->numSpare--; + rf_unlock_mutex2(raidPtr->mutex); + rf_ResumeNewRequests(raidPtr); - /* it's gone.. */ + ret = 0; - raidPtr->numSpare--; +out: - return(0); -#endif -} + rf_lock_mutex2(raidPtr->mutex); + raidPtr->changing_components = 0; + rf_signal_cond2(raidPtr->changing_components_cv); + rf_unlock_mutex2(raidPtr->mutex); + return(ret); +} +/* + * Delete a non hot spare component + */ int rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) { -#if 0 - RF_RaidDisk_t *disks; -#endif + RF_RaidDisk_t *disk; + RF_RowCol_t col = component->column; + struct vnode *vp; + int ret = EINVAL; - if ((component->column < 0) || - (component->column >= raidPtr->numCol)) { - return(EINVAL); + if (col < 0 || col >= raidPtr->numCol) + return(ret); + + rf_lock_mutex2(raidPtr->mutex); + while (raidPtr->changing_components == 1) { + rf_wait_cond2(raidPtr->changing_components_cv, raidPtr->mutex); } + raidPtr->changing_components = 1; + rf_unlock_mutex2(raidPtr->mutex); -#if 0 - disks = &raidPtr->Disks[component->column]; -#endif + disk = &raidPtr->Disks[col]; - /* 1. This component must be marked as 'failed' */ + /* 1. This component must be marked as failed or spared */ + switch (disk->status) { + case rf_ds_failed: + case rf_ds_dist_spared: + case rf_ds_spared: + break; + default: + ret = EBUSY; + goto out; + } - return(EINVAL); /* Not implemented yet. */ + vp = raidPtr->raid_cinfo[col].ci_vp; + raidPtr->raid_cinfo[col].ci_vp = NULL; + raidPtr->raid_cinfo[col].ci_dev = 0; + + /* This component was not automatically configured */ + disk->auto_configured = 0; + disk->dev = 0; + disk->numBlocks = 0; + snprintf(disk->devname, sizeof(disk->devname), "component%d", col); + + rf_close_component(raidPtr, vp, 0); + + ret = 0; +out: + rf_lock_mutex2(raidPtr->mutex); + raidPtr->changing_components = 0; + rf_signal_cond2(raidPtr->changing_components_cv); + rf_unlock_mutex2(raidPtr->mutex); + + return(ret); +} + +int +rf_remove_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component) +{ + RF_RowCol_t col = component->column; + + if (col < 0 || col >= raidPtr->numCol + raidPtr->numSpare) + return(EINVAL); + + if (col >= raidPtr->numCol) + return rf_remove_hot_spare(raidPtr, component); + else + return rf_delete_component(raidPtr, component); } int @@ -1143,3 +1254,48 @@ rf_incorporate_hot_spare(RF_Raid_t *raid return(EINVAL); /* Not implemented yet. */ } + +void +rf_swap_components(RF_Raid_t *raidPtr, int a, int b) +{ + char tmpdevname[56]; /* 56 is from raidframevar.h */ + RF_ComponentLabel_t tmp_ci_label; + dev_t tmp_ci_dev, tmp_dev; + int tmp_status; + struct vnode *tmp_ci_vp; + + + /* This function *MUST* be called with all IO suspended. */ + RF_ASSERT(raidPtr->accesses_suspended == 0); + + /* Swap the component names... */ + snprintf(tmpdevname, sizeof(tmpdevname),raidPtr->Disks[a].devname); + snprintf(raidPtr->Disks[a].devname, sizeof(raidPtr->Disks[a].devname), raidPtr->Disks[b].devname); + snprintf(raidPtr->Disks[b].devname, sizeof(raidPtr->Disks[b].devname), tmpdevname); + + /* and the vp */ + tmp_ci_vp = raidPtr->raid_cinfo[a].ci_vp; + raidPtr->raid_cinfo[a].ci_vp = raidPtr->raid_cinfo[b].ci_vp; + raidPtr->raid_cinfo[b].ci_vp = tmp_ci_vp; + + /* and the ci dev */ + tmp_ci_dev = raidPtr->raid_cinfo[a].ci_dev; + raidPtr->raid_cinfo[a].ci_dev = raidPtr->raid_cinfo[b].ci_dev; + raidPtr->raid_cinfo[b].ci_dev = tmp_ci_dev; + + /* the dev itself */ + tmp_dev = raidPtr->Disks[a].dev; + raidPtr->Disks[a].dev = raidPtr->Disks[b].dev; + raidPtr->Disks[b].dev = tmp_dev; + + /* the component label */ + tmp_ci_label = raidPtr->raid_cinfo[a].ci_label; + raidPtr->raid_cinfo[a].ci_label = raidPtr->raid_cinfo[b].ci_label; + raidPtr->raid_cinfo[b].ci_label = tmp_ci_label; + + /* and the status */ + tmp_status = raidPtr->Disks[a].status; + raidPtr->Disks[a].status = raidPtr->Disks[b].status; + raidPtr->Disks[b].status = tmp_status; +} + Index: src/sys/dev/raidframe/rf_disks.h diff -u src/sys/dev/raidframe/rf_disks.h:1.14 src/sys/dev/raidframe/rf_disks.h:1.15 --- src/sys/dev/raidframe/rf_disks.h:1.14 Sun Dec 11 12:23:37 2005 +++ src/sys/dev/raidframe/rf_disks.h Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_disks.h,v 1.14 2005/12/11 12:23:37 christos Exp $ */ +/* $NetBSD: rf_disks.h,v 1.15 2023/09/17 20:07:39 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -52,6 +52,8 @@ int rf_CheckLabels(RF_Raid_t *, RF_Confi int rf_add_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); int rf_remove_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); int rf_delete_component(RF_Raid_t *r, RF_SingleComponent_t *); +int rf_remove_component(RF_Raid_t *r, RF_SingleComponent_t *); int rf_incorporate_hot_spare(RF_Raid_t *, RF_SingleComponent_t *); +void rf_swap_components(RF_Raid_t *, int, int); #endif /* !_RF__RF_DISKS_H_ */ Index: src/sys/dev/raidframe/rf_driver.c diff -u src/sys/dev/raidframe/rf_driver.c:1.140 src/sys/dev/raidframe/rf_driver.c:1.141 --- src/sys/dev/raidframe/rf_driver.c:1.140 Wed Aug 10 01:16:38 2022 +++ src/sys/dev/raidframe/rf_driver.c Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_driver.c,v 1.140 2022/08/10 01:16:38 mrg Exp $ */ +/* $NetBSD: rf_driver.c,v 1.141 2023/09/17 20:07:39 oster Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. @@ -66,7 +66,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.140 2022/08/10 01:16:38 mrg Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.141 2023/09/17 20:07:39 oster Exp $"); #ifdef _KERNEL_OPT #include "opt_raid_diagnostic.h" @@ -348,6 +348,7 @@ rf_Configure(RF_Raid_t *raidPtr, RF_Conf raidPtr->numCol = cfgPtr->numCol; raidPtr->numSpare = cfgPtr->numSpare; + raidPtr->maxQueue = cfgPtr->numSpare; raidPtr->status = rf_rs_optimal; raidPtr->reconControl = NULL; @@ -401,7 +402,7 @@ rf_Configure(RF_Raid_t *raidPtr, RF_Conf raidPtr->numNewFailures = 0; raidPtr->copyback_in_progress = 0; raidPtr->parity_rewrite_in_progress = 0; - raidPtr->adding_hot_spare = 0; + raidPtr->changing_components = 0; raidPtr->recon_in_progress = 0; raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs; @@ -951,7 +952,7 @@ rf_alloc_mutex_cond(RF_Raid_t *raidPtr) rf_init_cond2(raidPtr->waitForReconCond, "rfrcnw"); - rf_init_cond2(raidPtr->adding_hot_spare_cv, "raidhs"); + rf_init_cond2(raidPtr->changing_components_cv, "raidhs"); } static void @@ -959,7 +960,7 @@ rf_destroy_mutex_cond(RF_Raid_t *raidPtr { rf_destroy_cond2(raidPtr->waitForReconCond); - rf_destroy_cond2(raidPtr->adding_hot_spare_cv); + rf_destroy_cond2(raidPtr->changing_components_cv); rf_destroy_mutex2(raidPtr->access_suspend_mutex); rf_destroy_cond2(raidPtr->access_suspend_cv); Index: src/sys/dev/raidframe/rf_netbsdkintf.c diff -u src/sys/dev/raidframe/rf_netbsdkintf.c:1.413 src/sys/dev/raidframe/rf_netbsdkintf.c:1.414 --- src/sys/dev/raidframe/rf_netbsdkintf.c:1.413 Sat Sep 16 23:38:57 2023 +++ src/sys/dev/raidframe/rf_netbsdkintf.c Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_netbsdkintf.c,v 1.413 2023/09/16 23:38:57 oster Exp $ */ +/* $NetBSD: rf_netbsdkintf.c,v 1.414 2023/09/17 20:07:39 oster Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. @@ -101,7 +101,7 @@ ***********************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.413 2023/09/16 23:38:57 oster Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.414 2023/09/17 20:07:39 oster Exp $"); #ifdef _KERNEL_OPT #include "opt_raid_autoconfig.h" @@ -774,6 +774,7 @@ raid_dumpblocks(device_t dev, void *va, for (c = 0; c < raidPtr->numSpare; c++) { sparecol = raidPtr->numCol + c; + if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { /* How about this one? */ scol = -1; @@ -1112,9 +1113,7 @@ rf_fail_disk(RF_Raid_t *raidPtr, struct rf_lock_mutex2(raidPtr->mutex); if (raidPtr->status == rf_rs_reconstructing) { - /* you can't fail a disk while we're reconstructing! */ - /* XXX wrong for RAID6 */ - goto out; + raidPtr->abortRecon[rr->col] = 1; } if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) && (raidPtr->numFailures > 0)) { @@ -1123,8 +1122,18 @@ rf_fail_disk(RF_Raid_t *raidPtr, struct goto out; } if (raidPtr->Disks[rr->col].status == rf_ds_spared) { - /* Can't fail a spared disk! */ - goto out; + int spareCol = raidPtr->Disks[rr->col].spareCol; + + if (spareCol < raidPtr->numCol || + spareCol >= raidPtr->numCol + raidPtr->numSpare) + goto out; + + /* + * Fail the spare disk so that we can + * reconstruct on another one. + */ + raidPtr->Disks[spareCol].status = rf_ds_failed; + } rf_unlock_mutex2(raidPtr->mutex); @@ -1560,13 +1569,15 @@ raidioctl(dev_t dev, u_long cmd, void *d rf_copy_single_component(&component, data); return rf_add_hot_spare(raidPtr, &component); - case RAIDFRAME_REMOVE_HOT_SPARE: - return retcode; - + /* Remove a non hot-spare component, never implemented in userland */ case RAIDFRAME_DELETE_COMPONENT: rf_copy_single_component(&component, data); return rf_delete_component(raidPtr, &component); + case RAIDFRAME_REMOVE_COMPONENT: + rf_copy_single_component(&component, data); + return rf_remove_component(raidPtr, &component); + case RAIDFRAME_INCORPORATE_HOT_SPARE: rf_copy_single_component(&component, data); return rf_incorporate_hot_spare(raidPtr, &component); @@ -2556,8 +2567,9 @@ rf_markalldirty(RF_Raid_t *raidPtr) } } - for( c = 0; c < raidPtr->numSpare ; c++) { + for (c = 0; c < raidPtr->numSpare ; c++) { sparecol = raidPtr->numCol + c; + if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { /* @@ -2629,8 +2641,9 @@ rf_update_component_labels(RF_Raid_t *ra /* else we don't touch it.. */ } - for( c = 0; c < raidPtr->numSpare ; c++) { + for (c = 0; c < raidPtr->numSpare ; c++) { sparecol = raidPtr->numCol + c; + /* Need to ensure that the reconstruct actually completed! */ if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { /* @@ -3459,6 +3472,7 @@ rf_set_autoconfig(RF_Raid_t *raidPtr, in } for(column = 0; column < raidPtr->numSpare ; column++) { sparecol = raidPtr->numCol + column; + if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { clabel = raidget_component_label(raidPtr, sparecol); clabel->autoconfigure = new_value; @@ -3483,8 +3497,9 @@ rf_set_rootpartition(RF_Raid_t *raidPtr, raidflush_component_label(raidPtr, column); } } - for(column = 0; column < raidPtr->numSpare ; column++) { + for (column = 0; column < raidPtr->numSpare ; column++) { sparecol = raidPtr->numCol + column; + if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { clabel = raidget_component_label(raidPtr, sparecol); clabel->root_partition = new_value; @@ -3833,6 +3848,7 @@ rf_sync_component_caches(RF_Raid_t *raid for (c = 0; c < raidPtr->numSpare ; c++) { int sparecol = raidPtr->numCol + c; + /* Need to ensure that the reconstruct actually completed! */ if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { int e = rf_sync_component_cache(raidPtr, sparecol, @@ -3917,12 +3933,12 @@ rf_get_info(RF_Raid_t *raidPtr, RF_Devic config->devs[d] = raidPtr->Disks[j]; d++; } - for (j = config->cols, i = 0; i < config->nspares; i++, j++) { - config->spares[i] = raidPtr->Disks[j]; - if (config->spares[i].status == rf_ds_rebuilding_spare) { - /* XXX: raidctl(8) expects to see this as a used spare */ - config->spares[i].status = rf_ds_used_spare; - } + for (i = 0; i < config->nspares; i++) { + config->spares[i] = raidPtr->Disks[raidPtr->numCol + i]; + if (config->spares[i].status == rf_ds_rebuilding_spare) { + /* raidctl(8) expects to see this as a used spare */ + config->spares[i].status = rf_ds_used_spare; + } } return 0; } Index: src/sys/dev/raidframe/rf_raid.h diff -u src/sys/dev/raidframe/rf_raid.h:1.51 src/sys/dev/raidframe/rf_raid.h:1.52 --- src/sys/dev/raidframe/rf_raid.h:1.51 Sat Aug 7 16:19:15 2021 +++ src/sys/dev/raidframe/rf_raid.h Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_raid.h,v 1.51 2021/08/07 16:19:15 thorpej Exp $ */ +/* $NetBSD: rf_raid.h,v 1.52 2023/09/17 20:07:39 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -116,7 +116,8 @@ struct RF_Raid_s { RF_RaidLayout_t Layout; /* all information related to layout */ RF_RaidDisk_t *Disks; /* all information related to physical disks */ - RF_DiskQueue_t *Queues;/* all information related to disk queues */ + RF_DiskQueue_t *Queues; /* all information related to disk queues */ + u_int maxQueue; /* initialized queues in Queues array */ const RF_DiskQueueSW_t *qType;/* pointer to the DiskQueueSW used for the component queues. */ /* NOTE: This is an anchor point via which the queues can be @@ -213,10 +214,10 @@ struct RF_Raid_s { int recon_in_progress; int parity_rewrite_in_progress; int copyback_in_progress; - int adding_hot_spare; + int changing_components; rf_declare_cond2(parity_rewrite_cv); - rf_declare_cond2(adding_hot_spare_cv); + rf_declare_cond2(changing_components_cv); /* * Engine thread control @@ -259,6 +260,8 @@ struct RF_Raid_s { int waitShutdown; int nAccOutstanding; + int *abortRecon; /* Abort background operations requested */ + RF_DiskId_t **diskids; int raidid; Index: src/sys/dev/raidframe/rf_reconstruct.c diff -u src/sys/dev/raidframe/rf_reconstruct.c:1.128 src/sys/dev/raidframe/rf_reconstruct.c:1.129 --- src/sys/dev/raidframe/rf_reconstruct.c:1.128 Fri Sep 8 22:55:32 2023 +++ src/sys/dev/raidframe/rf_reconstruct.c Sun Sep 17 20:07:39 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: rf_reconstruct.c,v 1.128 2023/09/08 22:55:32 oster Exp $ */ +/* $NetBSD: rf_reconstruct.c,v 1.129 2023/09/17 20:07:39 oster Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. @@ -33,7 +33,7 @@ ************************************************************/ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.128 2023/09/08 22:55:32 oster Exp $"); +__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.129 2023/09/17 20:07:39 oster Exp $"); #include <sys/param.h> #include <sys/time.h> @@ -295,9 +295,10 @@ rf_ReconstructFailedDiskBasic(RF_Raid_t rc = rf_ContinueReconstructFailedDisk(reconDesc); if (!rc) { - /* fix up the component label */ - /* Don't actually need the read here.. */ - c_label = raidget_component_label(raidPtr, scol); + /* fix up the component label. Note that at this point col and scol have swapped places. */ + /* We need to read from the *spared* disk, but use that label for the real component */ + + c_label = raidget_component_label(raidPtr, col); raid_init_component_label(raidPtr, c_label); c_label->row = 0; @@ -305,7 +306,7 @@ rf_ReconstructFailedDiskBasic(RF_Raid_t c_label->clean = RF_RAID_DIRTY; c_label->status = rf_ds_optimal; rf_component_label_set_partitionsize(c_label, - raidPtr->Disks[scol].partitionSize); + raidPtr->Disks[col].partitionSize); /* We've just done a rebuild based on all the other disks, so at this point the parity is known to be @@ -320,13 +321,11 @@ rf_ReconstructFailedDiskBasic(RF_Raid_t so we just update the spare disk as being a used spare */ - spareDiskPtr->status = rf_ds_used_spare; raidPtr->parity_good = RF_RAID_CLEAN; rf_unlock_mutex2(raidPtr->mutex); /* XXXX MORE NEEDED HERE */ - - raidflush_component_label(raidPtr, scol); + raidflush_component_label(raidPtr, col); } else { /* Reconstruct failed. */ @@ -522,7 +521,6 @@ rf_ReconstructInPlace(RF_Raid_t *raidPtr rf_unlock_mutex2(raidPtr->mutex); /* fix up the component label */ - /* Don't actually need the read here.. */ c_label = raidget_component_label(raidPtr, col); rf_lock_mutex2(raidPtr->mutex); @@ -643,9 +641,15 @@ rf_ContinueReconstructFailedDisk(RF_Raid done = 0; while (!done) { - if (raidPtr->waitShutdown) { - /* someone is unconfiguring this array... bail on the reconstruct.. */ + if (raidPtr->waitShutdown || + raidPtr->abortRecon[col]) { + /* + * someone is unconfiguring this array + * or failed a component + *... bail on the reconstruct.. + */ recon_error = 1; + raidPtr->abortRecon[col] = 0; break; } @@ -898,6 +902,7 @@ rf_ContinueReconstructFailedDisk(RF_Raid rf_DrainReconEventQueue(reconDesc); rf_FreeReconControl(raidPtr); + #if RF_ACC_TRACE > 0 RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t)); #endif @@ -918,6 +923,21 @@ rf_ContinueReconstructFailedDisk(RF_Raid ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE); raidPtr->Disks[col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared; raidPtr->status = (ds) ? rf_rs_reconfigured : rf_rs_optimal; + + if (col != scol) { + /* swap the names, raid_cinfo. queues stay where they are. */ + rf_swap_components(raidPtr, col, scol); + + /* mark the new spare as good */ + raidPtr->Disks[col].status = rf_ds_optimal; + + for (i = scol; i < raidPtr->numCol+raidPtr->numSpare-1; i++) { + /* now we work our way up the array, swapping as we go. */ + /* swap with the one at the next position, which must be there */ + rf_swap_components(raidPtr, i, i+1); + } + raidPtr->numSpare--; + } rf_unlock_mutex2(raidPtr->mutex); RF_GETTIME(etime); RF_TIMEVAL_DIFF(&(raidPtr->reconControl->starttime), &etime, &elpsd);