On Wed, Apr 02, 2025 at 06:42:34PM +0200, Martin Wilck wrote:
> On Mon, 2025-03-31 at 19:17 -0400, Benjamin Marzinski wrote:
> > When a new device is added by the multipath command, multipathd may
> > know
> > of other paths that cannot be added to the device because they are
> > currently offline. Instead of ignoring these paths, multipathd will
> > now
> > re-add them when they come back online. To do this, it multipathd
> > needs
> > a new path variable add_when_online, to track devices that could not
> > be
> > added to an existing multipath device because they were offline.
> > These
> > paths are handled along with the other uninitialized paths.
> > 
> > Signed-off-by: Benjamin Marzinski <bmarz...@redhat.com>
> > ---
> >  libmultipath/print.c       |  5 +++-
> >  libmultipath/structs.h     |  1 +
> >  libmultipath/structs_vec.c |  5 ++++
> >  multipathd/main.c          | 59
> > ++++++++++++++++++++++++++++++++++++--
> >  multipathd/multipathd.8.in |  5 ++--
> >  5 files changed, 70 insertions(+), 5 deletions(-)
> > 
> > diff --git a/libmultipath/print.c b/libmultipath/print.c
> > index 00c03ace..019ae56f 100644
> > --- a/libmultipath/print.c
> > +++ b/libmultipath/print.c
> > @@ -658,8 +658,11 @@ snprint_path_serial (struct strbuf *buff, const
> > struct path * pp)
> >  static int
> >  snprint_path_mpp (struct strbuf *buff, const struct path * pp)
> >  {
> > -   if (!pp->mpp)
> > +   if (!pp->mpp) {
> > +           if (pp->add_when_online)
> > +                   return append_strbuf_str(buff, "[offline]");
> >             return append_strbuf_str(buff, "[orphan]");
> > +   }
> >     if (!pp->mpp->alias)
> >             return append_strbuf_str(buff, "[unknown]");
> >     return snprint_str(buff, pp->mpp->alias);
> > diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> > index 28de9a7f..39d1c71c 100644
> > --- a/libmultipath/structs.h
> > +++ b/libmultipath/structs.h
> > @@ -413,6 +413,7 @@ struct path {
> >     int eh_deadline;
> >     enum check_path_states is_checked;
> >     bool can_use_env_uid;
> > +   bool add_when_online;
> >     unsigned int checker_timeout;
> >     /* configlet pointers */
> >     vector hwe;
> > diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
> > index f6407e12..663c9053 100644
> > --- a/libmultipath/structs_vec.c
> > +++ b/libmultipath/structs_vec.c
> > @@ -389,6 +389,9 @@ static void orphan_paths(vector pathvec, struct
> > multipath *mpp, const char *reas
> >                             free_path(pp);
> >                     } else
> >                             orphan_path(pp, reason);
> > +           } else if (pp->add_when_online &&
> > +                      strncmp(mpp->wwid, pp->wwid, WWID_SIZE)
> > == 0) {
> > +                   pp->add_when_online = false;
> >             }
> >     }
> >  }
> > @@ -595,6 +598,8 @@ void sync_paths(struct multipath *mpp, vector
> > pathvec)
> >             found = 0;
> >             vector_foreach_slot(mpp->pg, pgp, j) {
> >                     if (find_slot(pgp->paths, (void *)pp) != -1)
> > {
> > +                           if (pp->add_when_online)
> > +                                   pp->add_when_online = false;
> >                             found = 1;
> >                             break;
> >                     }
> > diff --git a/multipathd/main.c b/multipathd/main.c
> > index 7aaae773..9aa5a2fa 100644
> > --- a/multipathd/main.c
> > +++ b/multipathd/main.c
> > @@ -644,11 +644,45 @@ pr_register_active_paths(struct multipath *mpp)
> >     }
> >  }
> >  
> > +static void
> > +save_offline_paths(const struct multipath *mpp, vector
> > offline_paths)
> > +{
> > +   unsigned int i, j;
> > +   struct path *pp;
> > +   struct pathgroup *pgp;
> > +
> > +   vector_foreach_slot (mpp->pg, pgp, i)
> > +           vector_foreach_slot (pgp->paths, pp, j)
> > +                   if (pp->initialized == INIT_OK &&
> > +                       pp->sysfs_state == PATH_DOWN)
> > +                           /* ignore failures storing the
> > paths. */
> > +                           store_path(offline_paths, pp);
> > +}
> > +
> > +static void
> > +handle_orphaned_offline_paths(vector offline_paths)
> > +{
> > +   unsigned int i;
> > +   struct path *pp;
> > +
> > +   vector_foreach_slot (offline_paths, pp, i)
> > +           if (pp->mpp == NULL)
> > +                   pp->add_when_online = true;
> > +}
> > +
> > +static void
> > +cleanup_reset_vec(struct vector_s **v)
> > +{
> > +   vector_reset(*v);
> > +}
> > +
> >  static int
> >  update_map (struct multipath *mpp, struct vectors *vecs, int
> > new_map)
> >  {
> >     int retries = 3;
> >     char *params __attribute__((cleanup(cleanup_charp))) = NULL;
> > +   struct vector_s offline_paths_vec = { .allocated = 0 };
> > +   vector offline_paths
> > __attribute__((cleanup(cleanup_reset_vec))) = &offline_paths_vec;
> >  
> >  retry:
> >     condlog(4, "%s: updating new map", mpp->alias);
> > @@ -685,6 +719,9 @@ fail:
> >             return 1;
> >     }
> >  
> > +   if (new_map && retries < 0)
> > +           save_offline_paths(mpp, offline_paths);
> > +
> >     if (setup_multipath(vecs, mpp))
> >             return 1;
> >  
> > @@ -695,6 +732,9 @@ fail:
> >     if (mpp->prflag == PRFLAG_SET)
> >             pr_register_active_paths(mpp);
> >  
> > +   if (VECTOR_SIZE(offline_paths) != 0)
> > +           handle_orphaned_offline_paths(offline_paths);
> > +
> >     if (retries < 0)
> >             condlog(0, "%s: failed reload in new map update",
> > mpp->alias);
> >     return 0;
> > @@ -2793,7 +2833,8 @@ check_uninitialized_path(struct path * pp,
> > unsigned int ticks)
> >     struct config *conf;
> >  
> >     if (pp->initialized != INIT_NEW && pp->initialized !=
> > INIT_FAILED &&
> > -       pp->initialized != INIT_MISSING_UDEV)
> > +       pp->initialized != INIT_MISSING_UDEV &&
> > +       !(pp->initialized == INIT_OK && pp->add_when_online))
> >             return CHECK_PATH_SKIPPED;
> >  
> >     if (pp->tick)
> > @@ -2849,7 +2890,8 @@ update_uninitialized_path(struct vectors *
> > vecs, struct path * pp)
> >     struct config *conf;
> >  
> >     if (pp->initialized != INIT_NEW && pp->initialized !=
> > INIT_FAILED &&
> > -       pp->initialized != INIT_MISSING_UDEV)
> > +       pp->initialized != INIT_MISSING_UDEV &&
> > +       !(pp->initialized == INIT_OK && pp->add_when_online))
> >             return CHECK_PATH_SKIPPED;
> >  
> >     newstate = get_new_state(pp);
> > @@ -2875,6 +2917,19 @@ update_uninitialized_path(struct vectors *
> > vecs, struct path * pp)
> >                     free_path(pp);
> >                     return CHECK_PATH_REMOVED;
> >             }
> > +   } else if (pp->initialized == INIT_OK && pp->add_when_online
> > &&
> > +              (newstate == PATH_UP || newstate == PATH_GHOST))
> > {
> > +           pp->initialized = INIT_OK;
> > +           if (pp->recheck_wwid == RECHECK_WWID_ON &&
> 
> I wonder if we should always check the WWID here. After all, this path
> has never been part of the map, and it was offline when the map was
> created (IOW, the WWID couldn't be checked at that point in time). 
> Can we rely on the stored WWID?

That make sense. I also noticed a typo in my documentation changes. I'll
send a new patch.

-Ben

> Other than that, LGTM.
> 
> Regards,
> Martin
> 
> > +               check_path_wwid_change(pp)) {
> > +                   condlog(0, "%s: path wwid change detected.
> > Removing",
> > +                           pp->dev);
> > +                   return handle_path_wwid_change(pp, vecs)?
> > +                                   CHECK_PATH_REMOVED :
> > +                                   CHECK_PATH_SKIPPED;
> > +           }
> > +           ev_add_path(pp, vecs, 1);
> > +           pp->tick = 1;
> >     }
> >     return CHECK_PATH_CHECKED;
> >  }
> > diff --git a/multipathd/multipathd.8.in b/multipathd/multipathd.8.in
> > index 43f87bf8..a91acff1 100644
> > --- a/multipathd/multipathd.8.in
> > +++ b/multipathd/multipathd.8.in
> > @@ -595,8 +595,9 @@ The device serial number.
> >  The device marginal state, either \fImarginal\fR or \fInormal\fR.
> >  .TP
> >  .B %m
> > -The multipath device that this device is a path of, or
> > \fI[orphan]\fR if
> > -it is not part of any multipath device.
> > +The multipath device that this device is a path of, or
> > \fI[offline]\fR
> > +if this device could not be added to a device because is is offline
> > or
> > +\fI[orphan]\fR if it is not part of any multipath device.
> >  .TP
> >  .B %N
> >  The host World Wide Node Name (WWNN) of the device, if any.


Reply via email to