On Mon, 2025-03-24 at 16:55 -0400, Benjamin Marzinski wrote:
> When a new device is added by the multipath command, multipathd may
> know
> of other paths that cannot be added to the device because they are
> currently offline. Instead of ignoring these paths, multipathd will
> now
> re-add them when they come back online. To do this, it multipathd
> needs
> a new device initialized state, INIT_OFFLINE, to track devices that
> were
> in INIT_OK, but could not be added to an existing multipath device
> because they were offline. These paths are handled along with the
> other
> uninitialized paths.
>
> Signed-off-by: Benjamin Marzinski <[email protected]>
This looks good in general, but I'm not certain about INIT_OFFLINE. The
property "path was temporarily offline while we tried to add it to a
map" is not logically related to path initialization, IMO. Also, we
have plenty of (pp->initialized != INIT_xyz) conditionals in the code,
and your patch touches only 2 of them. I find it difficult to rule out
that some of them might be broken by the additional init state. Have
you double-checked them all?
Maybe it would be better to store this property in a separate flag in
struct path?
2 more remarks below.
Thanks
Martin
> ---
> libmultipath/print.c | 1 +
> libmultipath/structs.h | 5 ++++
> libmultipath/structs_vec.c | 5 ++++
> multipathd/main.c | 58
> ++++++++++++++++++++++++++++++++++++--
> 4 files changed, 67 insertions(+), 2 deletions(-)
>
> diff --git a/libmultipath/print.c b/libmultipath/print.c
> index 00c03ace..ed8adebe 100644
> --- a/libmultipath/print.c
> +++ b/libmultipath/print.c
> @@ -572,6 +572,7 @@ static int snprint_initialized(struct strbuf
> *buff, const struct path * pp)
> [INIT_OK] = "ok",
> [INIT_REMOVED] = "removed",
> [INIT_PARTIAL] = "partial",
> + [INIT_OFFLINE] = "offline",
> };
> const char *str;
>
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 28de9a7f..8644407f 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -258,6 +258,11 @@ enum initialized_states {
> * change uevent is received.
> */
> INIT_PARTIAL,
> + /*
> + * INIT_OFFLINE: paths that should be part of an existing
> multipath
> + * device, but cannot be added because they are offline,
> + */
> + INIT_OFFLINE,
> INIT_LAST__,
> };
>
> diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
> index f6407e12..f122d056 100644
> --- a/libmultipath/structs_vec.c
> +++ b/libmultipath/structs_vec.c
> @@ -389,6 +389,9 @@ static void orphan_paths(vector pathvec, struct
> multipath *mpp, const char *reas
> free_path(pp);
> } else
> orphan_path(pp, reason);
> + } else if (pp->initialized == INIT_OFFLINE &&
> + strncmp(mpp->wwid, pp->wwid, WWID_SIZE)
> == 0) {
> + pp->initialized = INIT_OK;
> }
> }
> }
> @@ -595,6 +598,8 @@ void sync_paths(struct multipath *mpp, vector
> pathvec)
> found = 0;
> vector_foreach_slot(mpp->pg, pgp, j) {
> if (find_slot(pgp->paths, (void *)pp) != -1)
> {
> + if (pp->initialized == INIT_OFFLINE)
> + pp->initialized = INIT_OK;
> found = 1;
> break;
> }
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 7aaae773..ecad5a4f 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -644,11 +644,44 @@ pr_register_active_paths(struct multipath *mpp)
> }
> }
>
> +static void
> +save_offline_paths(struct multipath *mpp, vector offline_paths)
const struct multipath *mpp ?
> +{
> + unsigned int i, j;
> + struct path *pp;
> + struct pathgroup *pgp;
> +
> + vector_foreach_slot (mpp->pg, pgp, i)
> + vector_foreach_slot (pgp->paths, pp, j)
> + if (pp->initialized == INIT_OK &&
> + pp->sysfs_state == PATH_DOWN)
> + store_path(offline_paths, pp);
You're not handling error from store_path here. I don't think you need
to, but perhaps indicate this by adding a comment or calling
(void)store_path(...).
> +}
> +
> +static void
> +handle_orphaned_offline_paths(vector offline_paths)
> +{
> + unsigned int i;
> + struct path *pp;
> +
> + vector_foreach_slot (offline_paths, pp, i)
> + if (pp->mpp == NULL)
> + pp->initialized = INIT_OFFLINE;
> +}
> +
> +static void
> +cleanup_reset_vec(struct vector_s **v)
> +{
> + vector_reset(*v);
> +}
> +
> static int
> update_map (struct multipath *mpp, struct vectors *vecs, int
> new_map)
> {
> int retries = 3;
> char *params __attribute__((cleanup(cleanup_charp))) = NULL;
> + struct vector_s offline_paths_vec = { .allocated = 0 };
> + vector offline_paths
> __attribute__((cleanup(cleanup_reset_vec))) = &offline_paths_vec;
>
> retry:
> condlog(4, "%s: updating new map", mpp->alias);
> @@ -685,6 +718,9 @@ fail:
> return 1;
> }
>
> + if (new_map && retries < 0)
> + save_offline_paths(mpp, offline_paths);
> +
> if (setup_multipath(vecs, mpp))
> return 1;
>
> @@ -695,6 +731,9 @@ fail:
> if (mpp->prflag == PRFLAG_SET)
> pr_register_active_paths(mpp);
>
> + if (VECTOR_SIZE(offline_paths) != 0)
> + handle_orphaned_offline_paths(offline_paths);
> +
> if (retries < 0)
> condlog(0, "%s: failed reload in new map update",
> mpp->alias);
> return 0;
> @@ -2793,7 +2832,8 @@ check_uninitialized_path(struct path * pp,
> unsigned int ticks)
> struct config *conf;
>
> if (pp->initialized != INIT_NEW && pp->initialized !=
> INIT_FAILED &&
> - pp->initialized != INIT_MISSING_UDEV)
> + pp->initialized != INIT_MISSING_UDEV &&
> + pp->initialized != INIT_OFFLINE)
> return CHECK_PATH_SKIPPED;
>
> if (pp->tick)
> @@ -2849,7 +2889,8 @@ update_uninitialized_path(struct vectors *
> vecs, struct path * pp)
> struct config *conf;
>
> if (pp->initialized != INIT_NEW && pp->initialized !=
> INIT_FAILED &&
> - pp->initialized != INIT_MISSING_UDEV)
> + pp->initialized != INIT_MISSING_UDEV &&
> + pp->initialized != INIT_OFFLINE)
> return CHECK_PATH_SKIPPED;
>
> newstate = get_new_state(pp);
> @@ -2875,6 +2916,19 @@ update_uninitialized_path(struct vectors *
> vecs, struct path * pp)
> free_path(pp);
> return CHECK_PATH_REMOVED;
> }
> + } else if (pp->initialized == INIT_OFFLINE &&
> + (newstate == PATH_UP || newstate == PATH_GHOST))
> {
> + pp->initialized = INIT_OK;
> + if (pp->recheck_wwid == RECHECK_WWID_ON &&
> + check_path_wwid_change(pp)) {
> + condlog(0, "%s: path wwid change detected.
> Removing",
> + pp->dev);
> + return handle_path_wwid_change(pp, vecs)?
> + CHECK_PATH_REMOVED :
> + CHECK_PATH_SKIPPED;
> + }
> + ev_add_path(pp, vecs, 1);
> + pp->tick = 1;
> }
> return CHECK_PATH_CHECKED;
> }