On Wed, 2018-03-14 at 12:46 -0500, Benjamin Marzinski wrote:
> The current method of waiting for dmevents on multipath devices
> involves
> creating a seperate thread for each device. This can become very
> wasteful when there are large numbers of multipath devices. Also,
> since
> multipathd needs to grab the vecs lock to update the devices, the
> additional threads don't actually provide much parallelism.
> 
> The patch adds a new method of updating multipath devices on
> dmevents,
> which uses the new device-mapper event polling interface. This means
> that there is only one dmevent waiting thread which will wait for
> events
> on all of the multipath devices.  Currently the code to get the event
> number from the list of device names and to re-arm the polling
> interface
> is not in libdevmapper, so the patch does that work. Obviously, these
> bits need to go into libdevmapper, so that multipathd can use a
> standard
> interface.
> 
> I haven't touched any of the existing event waiting code, since event
> polling was only added to device-mapper in version
> 4.37.0.  multipathd
> checks this version, and defaults to using the polling code if
> device-mapper supports it. This can be overridden by running
> multipathd
> with "-w", to force it to use the old event waiting code.
> 
> Signed-off-by: Benjamin Marzinski <[email protected]>

Reviewed-by: Martin Wilck <[email protected]>

... with some minor nitpicks, see below.

> ---
>  Makefile.inc               |   3 +
>  libmultipath/structs_vec.c |   8 +
>  libmultipath/structs_vec.h |   2 +
>  multipathd/Makefile        |   6 +-
>  multipathd/dmevents.c      | 392
> +++++++++++++++++++++++++++++++++++++++++++++
>  multipathd/dmevents.h      |  13 ++
>  multipathd/main.c          |  62 ++++++-
>  7 files changed, 477 insertions(+), 9 deletions(-)
>  create mode 100644 multipathd/dmevents.c
>  create mode 100644 multipathd/dmevents.h
> 
> 

> +static int arm_dm_event_poll(int fd)
> +{
> +     struct dm_ioctl dmi;
> +     memset(&dmi, 0, sizeof(dmi));
> +     dmi.version[0] = DM_VERSION_MAJOR;
> +     dmi.version[1] = DM_VERSION_MINOR;
> +     dmi.version[2] = DM_VERSION_PATCHLEVEL;
> +     dmi.flags = 0x4;

We've had this before, I'd appreciate a short comment explaining this
flag as Alasdair did in his post from Feb 13th.

> +
> +/* You must call update_multipath() after calling this function, to
> + * deal with any events that came in before the device was added */

This comment is slightly irritating, because you don't call
update_multipath() anywhere where you call watch_dmevents (rather, you
call __setup_multipath). I understand the comment is about the order of
calls, not about having to call update_multipath(), but that's not
immediately obvious.

> +int watch_dmevents(char *name)
> +{
> +     int event_nr;
> +     struct dev_event *dev_evt, *old_dev_evt;
> +     int i;
> +
> +     if (!dm_is_mpath(name)) {
> +             condlog(0, "%s: not a multipath device. can't watch
> events",
> +                     name);
> +             return -1;
> +     }
> +
> +     if ((event_nr = dm_geteventnr(name)) < 0)
> +             return -1;
> +
> +     dev_evt = (struct dev_event *)malloc(sizeof(struct
> dev_event));
> +     if (!dev_evt) {
> +             condlog(0, "%s: can't allocate event waiter
> structure", name);
> +             return -1;
> +     }
> +
> +     strncpy(dev_evt->name, name, WWID_SIZE);
> +     dev_evt->name[WWID_SIZE - 1] = 0;

Nitpick: using strlcpy() in new code would simplify code and review.

> +     dev_evt->evt_nr = event_nr;
> +     dev_evt->action = EVENT_NOTHING;
> +
> +     pthread_mutex_lock(&waiter->events_lock);
> +     vector_foreach_slot(waiter->events, old_dev_evt, i){
> +             if (!strcmp(dev_evt->name, old_dev_evt->name)) {
> +                     /* caller will be updating this device */
> +                     old_dev_evt->evt_nr = event_nr;
> +                     old_dev_evt->action = EVENT_NOTHING;
> +                     pthread_mutex_unlock(&waiter->events_lock);
> +                     condlog(2, "%s: already waiting for events
> on device",
> +                             name);

What about using condlog(3) here? Is it something admins need to care
about in regular operation?

> +                     free(dev_evt);
> +                     return 0;
> +             }
> +     }
> +     if (!vector_alloc_slot(waiter->events)) {
> +             pthread_mutex_unlock(&waiter->events_lock);
> +             free(dev_evt);
> +             return -1;
> +     }
> +     vector_set_slot(waiter->events, dev_evt);
> +     pthread_mutex_unlock(&waiter->events_lock);
> +     return 0;
> +}

> +/*
> + * returns the reschedule delay
> + * negative means *stop*
> + */
> +
> +/* poll, arm, update, return */
> +static int dmevent_loop (void)
> +{
> +     int r, i = 0;
> +     struct pollfd pfd;
> +     struct dev_event *dev_evt;
> +
> +     pfd.fd = waiter->fd;
> +     pfd.events = POLLIN;
> +     r = poll(&pfd, 1, -1);
> +     if (r <= 0) {
> +             condlog(0, "failed polling for dm events: %s",
> strerror(errno));
> +             /* sleep 1s and hope things get better */
> +             return 1;
> +     }
> +
> +     if (arm_dm_event_poll(waiter->fd) != 0) {
> +             condlog(0, "Cannot re-arm event polling: %s",
> strerror(errno));
> +             /* sleep 1s and hope things get better */
> +             return 1;
> +     }
> +
> +     if (dm_get_events() != 0) {
> +             condlog(0, "failed getting dm events: %s",
> strerror(errno));
> +             /* sleep 1s and hope things get better */
> +             return 1;
> +     }
> +
> +     /*
> +      * upon event ...
> +      */
> +
> +     while (1) {
> +             int done = 1;
> +             struct dev_event curr_dev;
> +
> +             pthread_mutex_lock(&waiter->events_lock);
> +             vector_foreach_slot(waiter->events, dev_evt, i) {
> +                     if (dev_evt->action != EVENT_NOTHING) {
> +                             curr_dev = *dev_evt;
> +                             if (dev_evt->action == EVENT_REMOVE)
> {
> +                                     vector_del_slot(waiter-
> >events, i);
> +                                     free(dev_evt);
> +                             } else
> +                                     dev_evt->action =
> EVENT_NOTHING;
> +                             done = 0;
> +                             break;
> +                     }
> +             }
> +             pthread_mutex_unlock(&waiter->events_lock);
> +             if (done)
> +                     return 1;
> +
> +             condlog(3, "%s: devmap event #%i", curr_dev.name,
> +                     curr_dev.evt_nr);
> +
> +             /*
> +              * event might be :
> +              *
> +              * 1) a table reload, which means our mpp structure
> is
> +              *    obsolete : refresh it through
> update_multipath()
> +              * 2) a path failed by DM : mark as such through
> +              *    update_multipath()
> +              * 3) map has gone away : stop the thread.
> +              * 4) a path reinstate : nothing to do
> +              * 5) a switch group : nothing to do
> +              */
> +             pthread_cleanup_push(cleanup_lock, &waiter->vecs-
> >lock);
> +             lock(&waiter->vecs->lock);
> +             pthread_testcancel();
> +             r = 0;
> +             if (curr_dev.action == EVENT_REMOVE)
> +                     remove_map_by_alias(curr_dev.name, waiter-
> >vecs, 1);
> +             else
> +                     r = update_multipath(waiter->vecs,
> curr_dev.name, 1);
> +             lock_cleanup_pop(&waiter->vecs->lock);

I dislike lock_cleanup_pop(). pthread_cleanup_pop(1) would be so much
more obvious.

Regards,
Martin

-- 
Dr. Martin Wilck <[email protected]>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to