On Wed, Apr 02, 2025 at 06:42:34PM +0200, Martin Wilck wrote: > On Mon, 2025-03-31 at 19:17 -0400, Benjamin Marzinski wrote: > > When a new device is added by the multipath command, multipathd may > > know > > of other paths that cannot be added to the device because they are > > currently offline. Instead of ignoring these paths, multipathd will > > now > > re-add them when they come back online. To do this, it multipathd > > needs > > a new path variable add_when_online, to track devices that could not > > be > > added to an existing multipath device because they were offline. > > These > > paths are handled along with the other uninitialized paths. > > > > Signed-off-by: Benjamin Marzinski <bmarz...@redhat.com> > > --- > > libmultipath/print.c | 5 +++- > > libmultipath/structs.h | 1 + > > libmultipath/structs_vec.c | 5 ++++ > > multipathd/main.c | 59 > > ++++++++++++++++++++++++++++++++++++-- > > multipathd/multipathd.8.in | 5 ++-- > > 5 files changed, 70 insertions(+), 5 deletions(-) > > > > diff --git a/libmultipath/print.c b/libmultipath/print.c > > index 00c03ace..019ae56f 100644 > > --- a/libmultipath/print.c > > +++ b/libmultipath/print.c > > @@ -658,8 +658,11 @@ snprint_path_serial (struct strbuf *buff, const > > struct path * pp) > > static int > > snprint_path_mpp (struct strbuf *buff, const struct path * pp) > > { > > - if (!pp->mpp) > > + if (!pp->mpp) { > > + if (pp->add_when_online) > > + return append_strbuf_str(buff, "[offline]"); > > return append_strbuf_str(buff, "[orphan]"); > > + } > > if (!pp->mpp->alias) > > return append_strbuf_str(buff, "[unknown]"); > > return snprint_str(buff, pp->mpp->alias); > > diff --git a/libmultipath/structs.h b/libmultipath/structs.h > > index 28de9a7f..39d1c71c 100644 > > --- a/libmultipath/structs.h > > +++ b/libmultipath/structs.h > > @@ -413,6 +413,7 @@ struct path { > > int eh_deadline; > > enum check_path_states is_checked; > > bool can_use_env_uid; > > + bool add_when_online; > > unsigned int checker_timeout; > > /* configlet pointers */ > > vector hwe; > > diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c > > index f6407e12..663c9053 100644 > > --- a/libmultipath/structs_vec.c > > +++ b/libmultipath/structs_vec.c > > @@ -389,6 +389,9 @@ static void orphan_paths(vector pathvec, struct > > multipath *mpp, const char *reas > > free_path(pp); > > } else > > orphan_path(pp, reason); > > + } else if (pp->add_when_online && > > + strncmp(mpp->wwid, pp->wwid, WWID_SIZE) > > == 0) { > > + pp->add_when_online = false; > > } > > } > > } > > @@ -595,6 +598,8 @@ void sync_paths(struct multipath *mpp, vector > > pathvec) > > found = 0; > > vector_foreach_slot(mpp->pg, pgp, j) { > > if (find_slot(pgp->paths, (void *)pp) != -1) > > { > > + if (pp->add_when_online) > > + pp->add_when_online = false; > > found = 1; > > break; > > } > > diff --git a/multipathd/main.c b/multipathd/main.c > > index 7aaae773..9aa5a2fa 100644 > > --- a/multipathd/main.c > > +++ b/multipathd/main.c > > @@ -644,11 +644,45 @@ pr_register_active_paths(struct multipath *mpp) > > } > > } > > > > +static void > > +save_offline_paths(const struct multipath *mpp, vector > > offline_paths) > > +{ > > + unsigned int i, j; > > + struct path *pp; > > + struct pathgroup *pgp; > > + > > + vector_foreach_slot (mpp->pg, pgp, i) > > + vector_foreach_slot (pgp->paths, pp, j) > > + if (pp->initialized == INIT_OK && > > + pp->sysfs_state == PATH_DOWN) > > + /* ignore failures storing the > > paths. */ > > + store_path(offline_paths, pp); > > +} > > + > > +static void > > +handle_orphaned_offline_paths(vector offline_paths) > > +{ > > + unsigned int i; > > + struct path *pp; > > + > > + vector_foreach_slot (offline_paths, pp, i) > > + if (pp->mpp == NULL) > > + pp->add_when_online = true; > > +} > > + > > +static void > > +cleanup_reset_vec(struct vector_s **v) > > +{ > > + vector_reset(*v); > > +} > > + > > static int > > update_map (struct multipath *mpp, struct vectors *vecs, int > > new_map) > > { > > int retries = 3; > > char *params __attribute__((cleanup(cleanup_charp))) = NULL; > > + struct vector_s offline_paths_vec = { .allocated = 0 }; > > + vector offline_paths > > __attribute__((cleanup(cleanup_reset_vec))) = &offline_paths_vec; > > > > retry: > > condlog(4, "%s: updating new map", mpp->alias); > > @@ -685,6 +719,9 @@ fail: > > return 1; > > } > > > > + if (new_map && retries < 0) > > + save_offline_paths(mpp, offline_paths); > > + > > if (setup_multipath(vecs, mpp)) > > return 1; > > > > @@ -695,6 +732,9 @@ fail: > > if (mpp->prflag == PRFLAG_SET) > > pr_register_active_paths(mpp); > > > > + if (VECTOR_SIZE(offline_paths) != 0) > > + handle_orphaned_offline_paths(offline_paths); > > + > > if (retries < 0) > > condlog(0, "%s: failed reload in new map update", > > mpp->alias); > > return 0; > > @@ -2793,7 +2833,8 @@ check_uninitialized_path(struct path * pp, > > unsigned int ticks) > > struct config *conf; > > > > if (pp->initialized != INIT_NEW && pp->initialized != > > INIT_FAILED && > > - pp->initialized != INIT_MISSING_UDEV) > > + pp->initialized != INIT_MISSING_UDEV && > > + !(pp->initialized == INIT_OK && pp->add_when_online)) > > return CHECK_PATH_SKIPPED; > > > > if (pp->tick) > > @@ -2849,7 +2890,8 @@ update_uninitialized_path(struct vectors * > > vecs, struct path * pp) > > struct config *conf; > > > > if (pp->initialized != INIT_NEW && pp->initialized != > > INIT_FAILED && > > - pp->initialized != INIT_MISSING_UDEV) > > + pp->initialized != INIT_MISSING_UDEV && > > + !(pp->initialized == INIT_OK && pp->add_when_online)) > > return CHECK_PATH_SKIPPED; > > > > newstate = get_new_state(pp); > > @@ -2875,6 +2917,19 @@ update_uninitialized_path(struct vectors * > > vecs, struct path * pp) > > free_path(pp); > > return CHECK_PATH_REMOVED; > > } > > + } else if (pp->initialized == INIT_OK && pp->add_when_online > > && > > + (newstate == PATH_UP || newstate == PATH_GHOST)) > > { > > + pp->initialized = INIT_OK; > > + if (pp->recheck_wwid == RECHECK_WWID_ON && > > I wonder if we should always check the WWID here. After all, this path > has never been part of the map, and it was offline when the map was > created (IOW, the WWID couldn't be checked at that point in time). > Can we rely on the stored WWID?
That make sense. I also noticed a typo in my documentation changes. I'll send a new patch. -Ben > Other than that, LGTM. > > Regards, > Martin > > > + check_path_wwid_change(pp)) { > > + condlog(0, "%s: path wwid change detected. > > Removing", > > + pp->dev); > > + return handle_path_wwid_change(pp, vecs)? > > + CHECK_PATH_REMOVED : > > + CHECK_PATH_SKIPPED; > > + } > > + ev_add_path(pp, vecs, 1); > > + pp->tick = 1; > > } > > return CHECK_PATH_CHECKED; > > } > > diff --git a/multipathd/multipathd.8.in b/multipathd/multipathd.8.in > > index 43f87bf8..a91acff1 100644 > > --- a/multipathd/multipathd.8.in > > +++ b/multipathd/multipathd.8.in > > @@ -595,8 +595,9 @@ The device serial number. > > The device marginal state, either \fImarginal\fR or \fInormal\fR. > > .TP > > .B %m > > -The multipath device that this device is a path of, or > > \fI[orphan]\fR if > > -it is not part of any multipath device. > > +The multipath device that this device is a path of, or > > \fI[offline]\fR > > +if this device could not be added to a device because is is offline > > or > > +\fI[orphan]\fR if it is not part of any multipath device. > > .TP > > .B %N > > The host World Wide Node Name (WWNN) of the device, if any.