On Wed, 2024-07-17 at 14:11 -0400, Benjamin Marzinski wrote:
> multipathd's path checking can be very bursty, with all the paths
> running their path checkers at the same time, and then all doing
> nothing
> while waiting for the next check time. Alternatively, the paths in a
> multipath device might all run their path checkers at a different
> time,
> which can keep the multipath device from having a coherent view of
> the
> state of all of its paths.
> 
> This patch makes all the paths of a multipath device converge to
> running
> their checkers at the same time, and spreads out when this time is
> for
> the different multipath devices.
> 
> To do this, the checking time is divided into adjustment intervals
> (conf->adjust_int), so that the checkers run at some index within
> this
> interval. conf->adjust_int is chosen so that it is a multiple of all
> possible pp->checkint values. This means that regardless of
> pp->checkint, the path should always be checked on the same indexes,
> each adjustement interval.
> 
> Each multipath device has a goal index. These are evenly spread out
> between 0 and conf->max_checkint. Every conf->adjust_int seconds,
> each
> multipath device should try to check all of its paths on its goal
> index.
> If the multipath device's check times are not aligned with the goal
> index, then pp->tick for the next check will be decremented by one,
> to
> align it over time.
> 
> In order for the path checkers to run every pp->checkint seconds,
> multipathd needs to track how long a path check has been pending for,
> and subtract that time from the number of ticks till the checker is
> run
> again. If the checker has been pending for more that pp->checkint,
> the path will be rechecked on the next tick after the checker
> returns.
> 
> Signed-off-by: Benjamin Marzinski <[email protected]>
> ---
>  libmultipath/config.c  | 12 ++++++
>  libmultipath/config.h  |  1 +
>  libmultipath/structs.c |  1 +
>  libmultipath/structs.h |  1 +
>  multipathd/main.c      | 91 ++++++++++++++++++++++++++++++++++------
> --
>  5 files changed, 89 insertions(+), 17 deletions(-)
> 
> diff --git a/libmultipath/config.c b/libmultipath/config.c
> index 83fa7369..a59533b5 100644
> --- a/libmultipath/config.c
> +++ b/libmultipath/config.c
> @@ -982,6 +982,18 @@ int _init_config (const char *file, struct
> config *conf)
>               conf->checkint = conf->max_checkint;
>       condlog(3, "polling interval: %d, max: %d",
>               conf->checkint, conf->max_checkint);
> +     /*
> +      * make sure that that adjust_int is a multiple of all
> possible values
> +      * of pp->checkint.
> +      */
> +     if (conf->max_checkint % conf->checkint == 0) {
> +             conf->adjust_int = conf->max_checkint;
> +     } else {
> +             conf->adjust_int = conf->checkint;
> +             while (2 * conf->adjust_int < conf->max_checkint)
> +                     conf->adjust_int *= 2;
> +             conf->adjust_int *= conf->max_checkint;
> +     }
>  
>       if (conf->blist_devnode == NULL) {
>               conf->blist_devnode = vector_alloc();
> diff --git a/libmultipath/config.h b/libmultipath/config.h
> index 384193ab..800c0ca9 100644
> --- a/libmultipath/config.h
> +++ b/libmultipath/config.h
> @@ -147,6 +147,7 @@ struct config {
>       int minio_rq;
>       unsigned int checkint;
>       unsigned int max_checkint;
> +     unsigned int adjust_int;
>       bool use_watchdog;
>       int pgfailback;
>       int rr_weight;
> diff --git a/libmultipath/structs.c b/libmultipath/structs.c
> index 0a26096a..232b4230 100644
> --- a/libmultipath/structs.c
> +++ b/libmultipath/structs.c
> @@ -149,6 +149,7 @@ uninitialize_path(struct path *pp)
>       pp->state = PATH_UNCHECKED;
>       pp->uid_attribute = NULL;
>       pp->checker_timeout = 0;
> +     pp->pending_ticks = 0;
>  
>       if (checker_selected(&pp->checker))
>               checker_put(&pp->checker);
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 002eeae1..457d7836 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -360,6 +360,7 @@ struct path {
>       unsigned long long size;
>       unsigned int checkint;
>       unsigned int tick;
> +     unsigned int pending_ticks;
>       int bus;
>       int offline;
>       int state;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 84450906..87ddb101 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2388,7 +2388,7 @@ enum check_path_return {
>  };
>  
>  static int
> -check_path (struct vectors * vecs, struct path * pp, unsigned int
> ticks)
> +do_check_path (struct vectors * vecs, struct path * pp)
>  {
>       int newstate;
>       int new_path_up = 0;
> @@ -2400,14 +2400,6 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
>       int marginal_pathgroups, marginal_changed = 0;
>       bool need_reload;
>  
> -     if (pp->initialized == INIT_REMOVED)
> -             return CHECK_PATH_SKIPPED;
> -
> -     if (pp->tick)
> -             pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
> -     if (pp->tick)
> -             return CHECK_PATH_SKIPPED;
> -
>       conf = get_multipath_config();
>       checkint = conf->checkint;
>       max_checkint = conf->max_checkint;
> @@ -2419,12 +2411,6 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
>               pp->checkint = checkint;
>       };
>  
> -     /*
> -      * provision a next check soonest,
> -      * in case we exit abnormally from here
> -      */
> -     pp->tick = checkint;
> -
>       newstate = check_path_state(pp);
>       if (newstate == PATH_WILD || newstate == PATH_UNCHECKED)
>               return CHECK_PATH_SKIPPED;
> @@ -2587,7 +2573,6 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
>                               condlog(4, "%s: delay next check
> %is",
>                                       pp->dev_t, pp->checkint);
>                       }
> -                     pp->tick = pp->checkint;
>               }
>       }
>       else if (newstate != PATH_UP && newstate != PATH_GHOST) {
> @@ -2640,6 +2625,77 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
>       return CHECK_PATH_CHECKED;
>  }
>  
> +static int
> +check_path (struct vectors * vecs, struct path * pp, unsigned int
> ticks,
> +         time_t start_secs)
> +{
> +     int r;
> +     unsigned int adjust_int, max_checkint;
> +     struct config *conf;
> +     time_t next_idx, goal_idx;
> +
> +     if (pp->initialized == INIT_REMOVED)
> +             return CHECK_PATH_SKIPPED;
> +
> +     if (pp->tick)
> +             pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
> +     if (pp->tick)
> +             return CHECK_PATH_SKIPPED;
> +
> +     conf = get_multipath_config();
> +     max_checkint = conf->max_checkint;
> +     adjust_int = conf->adjust_int;
> +     put_multipath_config(conf);
> +
> +     r = do_check_path(vecs, pp);
> +
> +     /*
> +      * do_check_path() removed or orphaned the path.
> +      */
> +     if (r < 0 || !pp->mpp)
> +             return r;

do_check_path() returns an enum now, so this should be r ==
CHECK_PATH_REMOVED.



Reply via email to