[dm-devel] [RFC PATCH 2/5] move waiter code from libmultipath to multipathd

2018-02-09 Thread Benjamin Marzinski
Only multipathd uses the code in waiter.[ch] and the functions that call
it directly, so they should all live in the multipathd directory.  This
patch is simply moving the waiter.[ch] files and the functions in
structs_vec that use them. None of the moved code has been changed.

Signed-off-by: Benjamin Marzinski 
---
 libmultipath/Makefile  |   2 +-
 libmultipath/structs_vec.c |  98 -
 libmultipath/structs_vec.h |   4 +-
 libmultipath/waiter.c  | 215 -
 libmultipath/waiter.h  |  17 
 multipathd/Makefile|   2 +-
 multipathd/main.c  |  96 
 multipathd/waiter.c| 215 +
 multipathd/waiter.h|  17 
 9 files changed, 332 insertions(+), 334 deletions(-)
 delete mode 100644 libmultipath/waiter.c
 delete mode 100644 libmultipath/waiter.h
 create mode 100644 multipathd/waiter.c
 create mode 100644 multipathd/waiter.h

diff --git a/libmultipath/Makefile b/libmultipath/Makefile
index 6447d8d..a1005b2 100644
--- a/libmultipath/Makefile
+++ b/libmultipath/Makefile
@@ -42,7 +42,7 @@ OBJS = memory.o parser.o vector.o devmapper.o callout.o \
pgpolicies.o debug.o defaults.o uevent.o time-util.o \
switchgroup.o uxsock.o print.o alias.o log_pthread.o \
log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \
-   lock.o waiter.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \
+   lock.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \
io_err_stat.o
 
 all: $(LIBS)
diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
index abf5327..77b045b 100644
--- a/libmultipath/structs_vec.c
+++ b/libmultipath/structs_vec.c
@@ -10,7 +10,6 @@
 #include "structs.h"
 #include "structs_vec.h"
 #include "sysfs.h"
-#include "waiter.h"
 #include "devmapper.h"
 #include "dmparser.h"
 #include "propsel.h"
@@ -107,17 +106,6 @@ void orphan_paths(vector pathvec, struct multipath *mpp)
}
 }
 
-static void
-set_multipath_wwid (struct multipath * mpp)
-{
-   if (strlen(mpp->wwid))
-   return;
-
-   dm_get_uuid(mpp->alias, mpp->wwid);
-}
-
-#define PURGE_VEC 1
-
 void
 remove_map(struct multipath * mpp, struct vectors * vecs, int purge_vec)
 {
@@ -379,92 +367,6 @@ sync_map_state(struct multipath *mpp)
}
 }
 
-int
-update_map (struct multipath *mpp, struct vectors *vecs)
-{
-   int retries = 3;
-   char params[PARAMS_SIZE] = {0};
-
-retry:
-   condlog(4, "%s: updating new map", mpp->alias);
-   if (adopt_paths(vecs->pathvec, mpp)) {
-   condlog(0, "%s: failed to adopt paths for new map update",
-   mpp->alias);
-   retries = -1;
-   goto fail;
-   }
-   verify_paths(mpp, vecs);
-   mpp->action = ACT_RELOAD;
-
-   extract_hwe_from_path(mpp);
-   if (setup_map(mpp, params, PARAMS_SIZE)) {
-   condlog(0, "%s: failed to setup new map in update", mpp->alias);
-   retries = -1;
-   goto fail;
-   }
-   if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
-   condlog(0, "%s: map_udate sleep", mpp->alias);
-   sleep(1);
-   goto retry;
-   }
-   dm_lib_release();
-
-fail:
-   if (setup_multipath(vecs, mpp))
-   return 1;
-
-   sync_map_state(mpp);
-
-   if (retries < 0)
-   condlog(0, "%s: failed reload in new map update", mpp->alias);
-   return 0;
-}
-
-struct multipath *add_map_without_path (struct vectors *vecs, char *alias)
-{
-   struct multipath * mpp = alloc_multipath();
-   struct config *conf;
-
-   if (!mpp)
-   return NULL;
-   if (!alias) {
-   FREE(mpp);
-   return NULL;
-   }
-
-   mpp->alias = STRDUP(alias);
-
-   if (dm_get_info(mpp->alias, >dmi)) {
-   condlog(3, "%s: cannot access table", mpp->alias);
-   goto out;
-   }
-   set_multipath_wwid(mpp);
-   conf = get_multipath_config();
-   mpp->mpe = find_mpe(conf->mptable, mpp->wwid);
-   put_multipath_config(conf);
-
-   if (update_multipath_table(mpp, vecs->pathvec, 1))
-   goto out;
-   if (update_multipath_status(mpp))
-   goto out;
-
-   if (!vector_alloc_slot(vecs->mpvec))
-   goto out;
-
-   vector_set_slot(vecs->mpvec, mpp);
-
-   if (update_map(mpp, vecs) != 0) /* map removed */
-   return NULL;
-
-   if (start_waiter_thread(mpp, vecs))
-   goto out;
-
-   return mpp;
-out:
-   remove_map(mpp, vecs, PURGE_VEC);
-   return NULL;
-}
-
 static void
 find_existing_alias (struct multipath * mpp,
 struct vectors *vecs)
diff --git a/libmultipath/structs_vec.h b/libmultipath/structs_vec.h
index d6e17bb..ceab6d9 100644
--- a/libmultipath/structs_vec.h
+++ 

[dm-devel] [RFC PATCH 4/5] libmultipath: add helper functions

2018-02-09 Thread Benjamin Marzinski
Add the ability to reset a vector without completely freeing it, and to
check the version of the device-mapper module.  The existing version
checking code checks the version of a specific device mapper target, and
has been renamed for clarity's sake. These functions will be used in a
later patch.

Signed-off-by: Benjamin Marzinski 
---
 libmultipath/devmapper.c | 28 
 libmultipath/devmapper.h |  3 ++-
 libmultipath/vector.c| 16 
 libmultipath/vector.h|  1 +
 multipathd/main.c|  2 +-
 5 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c
index 573fc75..2960bf5 100644
--- a/libmultipath/devmapper.c
+++ b/libmultipath/devmapper.c
@@ -132,7 +132,27 @@ dm_lib_prereq (void)
 }
 
 int
-dm_drv_version (unsigned int * version, char * str)
+dm_drv_version(unsigned int *v)
+{
+   char buff[64];
+
+   v[0] = 0;
+   v[1] = 0;
+   v[2] = 0;
+
+   if (!dm_driver_version(buff, sizeof(buff))) {
+   condlog(0, "cannot get kernel dm version");
+   return 1;
+   }
+   if (sscanf(buff, "%u.%u.%u ", [0], [1], [2]) != 3) {
+   condlog(0, "invalid kernel dm version '%s'", buff);
+   return 1;
+   }
+   return 0;
+}
+
+int
+dm_tgt_version (unsigned int * version, char * str)
 {
int r = 2;
struct dm_task *dmt;
@@ -179,13 +199,13 @@ out:
 }
 
 static int
-dm_drv_prereq (unsigned int *ver)
+dm_tgt_prereq (unsigned int *ver)
 {
unsigned int minv[3] = {1, 0, 3};
unsigned int version[3] = {0, 0, 0};
unsigned int * v = version;
 
-   if (dm_drv_version(v, TGT_MPATH)) {
+   if (dm_tgt_version(v, TGT_MPATH)) {
/* in doubt return not capable */
return 1;
}
@@ -210,7 +230,7 @@ static int dm_prereq(unsigned int *v)
 {
if (dm_lib_prereq())
return 1;
-   return dm_drv_prereq(v);
+   return dm_tgt_prereq(v);
 }
 
 static int libmp_dm_udev_sync = 0;
diff --git a/libmultipath/devmapper.h b/libmultipath/devmapper.h
index 62e14d1..52d4af8 100644
--- a/libmultipath/devmapper.h
+++ b/libmultipath/devmapper.h
@@ -28,7 +28,8 @@ void dm_init(int verbosity);
 void libmp_dm_init(void);
 void libmp_udev_set_sync_support(int on);
 struct dm_task *libmp_dm_task_create(int task);
-int dm_drv_version (unsigned int * version, char * str);
+int dm_drv_version (unsigned int * version);
+int dm_tgt_version (unsigned int * version, char * str);
 int dm_simplecmd_flush (int, const char *, uint16_t);
 int dm_simplecmd_noflush (int, const char *, uint16_t);
 int dm_addmap_create (struct multipath *mpp, char *params);
diff --git a/libmultipath/vector.c b/libmultipath/vector.c
index 6266e0a..f741ae0 100644
--- a/libmultipath/vector.c
+++ b/libmultipath/vector.c
@@ -145,18 +145,26 @@ vector_repack(vector v)
vector_del_slot(v, i--);
 }
 
-/* Free memory vector allocation */
-void
-vector_free(vector v)
+vector
+vector_reset(vector v)
 {
if (!v)
-   return;
+   return NULL;
 
if (v->slot)
FREE(v->slot);
 
v->allocated = 0;
v->slot = NULL;
+   return v;
+}
+
+/* Free memory vector allocation */
+void
+vector_free(vector v)
+{
+   if (!vector_reset(v))
+   return;
FREE(v);
 }
 
diff --git a/libmultipath/vector.h b/libmultipath/vector.h
index 5cfd4d0..d69cd0b 100644
--- a/libmultipath/vector.h
+++ b/libmultipath/vector.h
@@ -45,6 +45,7 @@ typedef struct _vector *vector;
 /* Prototypes */
 extern vector vector_alloc(void);
 extern void *vector_alloc_slot(vector v);
+vector vector_reset(vector v);
 extern void vector_free(vector v);
 extern void free_strvec(vector strvec);
 extern void vector_set_slot(vector v, void *value);
diff --git a/multipathd/main.c b/multipathd/main.c
index efc39d7..2963bde 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2228,7 +2228,7 @@ reconfigure (struct vectors * vecs)
/* Re-read any timezone changes */
tzset();
 
-   dm_drv_version(conf->version, TGT_MPATH);
+   dm_tgt_version(conf->version, TGT_MPATH);
if (verbosity)
conf->verbosity = verbosity;
if (bindings_read_only)
-- 
2.7.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


[dm-devel] [RFC PATCH 5/5] multipathd: RFC add new polling dmevents waiter thread

2018-02-09 Thread Benjamin Marzinski
The current method of waiting for dmevents on multipath devices involves
creating a seperate thread for each device. This can become very
wasteful when there are large numbers of multipath devices. Also, since
multipathd needs to grab the vecs lock to update the devices, the
additional threads don't actually provide much parallelism.

The patch adds a new method of updating multipath devices on dmevents,
which uses the new device-mapper event polling interface. This means
that there is only one dmevent waiting thread which will wait for events
on all of the multipath devices.  Currently the code to get the event
number from the list of device names and to re-arm the polling interface
is not in libdevmapper, so the patch does that work. Obviously, these
bits need to go into libdevmapper, so that multipathd can use a standard
interface.

I haven't touched any of the existing event waiting code, since event
polling was only added to device-mapper in version 4.37.0.  multipathd
checks this version, and defaults to using the polling code if
device-mapper supports it. This can be overridden by running multipathd
with "-w", to force it to use the old event waiting code.

Signed-off-by: Benjamin Marzinski 
---
 multipathd/Makefile   |   3 +-
 multipathd/dmevents.c | 396 ++
 multipathd/dmevents.h |  13 ++
 multipathd/main.c |  58 +++-
 4 files changed, 461 insertions(+), 9 deletions(-)
 create mode 100644 multipathd/dmevents.c
 create mode 100644 multipathd/dmevents.h

diff --git a/multipathd/Makefile b/multipathd/Makefile
index 85f29a7..4c438f0 100644
--- a/multipathd/Makefile
+++ b/multipathd/Makefile
@@ -22,7 +22,8 @@ ifdef SYSTEMD
endif
 endif
 
-OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o waiter.o
+OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o waiter.o \
+   dmevents.o
 
 EXEC = multipathd
 
diff --git a/multipathd/dmevents.c b/multipathd/dmevents.c
new file mode 100644
index 000..a56c055
--- /dev/null
+++ b/multipathd/dmevents.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2004, 2005 Christophe Varoqui
+ * Copyright (c) 2005 Kiyoshi Ueda, NEC
+ * Copyright (c) 2005 Edward Goggin, EMC
+ * Copyright (c) 2005, 2018 Benjamin Marzinski, Redhat
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vector.h"
+#include "structs.h"
+#include "structs_vec.h"
+#include "devmapper.h"
+#include "debug.h"
+#include "dmevents.h"
+
+#ifndef DM_DEV_ARM_POLL
+#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD + 1, struct 
dm_ioctl)
+#endif
+
+enum event_actions {
+   EVENT_NOTHING,
+   EVENT_REMOVE,
+   EVENT_UPDATE,
+};
+
+struct dev_event {
+   char name[WWID_SIZE];
+   uint32_t evt_nr;
+   enum event_actions action;
+};
+
+struct dmevent_waiter {
+   int fd;
+   struct vectors *vecs;
+   vector events;
+   pthread_mutex_t events_lock;
+};
+
+static struct dmevent_waiter *waiter;
+
+int dmevent_poll_supported(void)
+{
+   unsigned int minv[3] = {4, 37, 0};
+   unsigned int v[3];
+
+   if (dm_drv_version(v))
+   return 0;
+
+   if (VERSION_GE(v, minv))
+   return 1;
+   return 0;
+}
+
+
+int alloc_dmevent_waiter(struct vectors *vecs)
+{
+   if (!vecs) {
+   condlog(0, "can't create waiter structure. invalid vectors");
+   goto fail;
+   }
+   waiter = (struct dmevent_waiter *)malloc(sizeof(struct dmevent_waiter));
+   if (!waiter) {
+   condlog(0, "failed to allocate waiter structure");
+   goto fail;
+   }
+   memset(waiter, 0, sizeof(struct dmevent_waiter));
+   waiter->events = vector_alloc();
+   if (!waiter->events) {
+   condlog(0, "failed to allocate waiter events vector");
+   goto fail_waiter;
+   }
+   waiter->fd = open("/dev/mapper/control", O_RDWR);
+   if (waiter->fd < 0) {
+   condlog(0, "failed to open /dev/mapper/control for waiter");
+   goto fail_events;
+   }
+   pthread_mutex_init(>events_lock, NULL);
+   waiter->vecs = vecs;
+
+   return 0;
+fail_events:
+   vector_free(waiter->events);
+fail_waiter:
+   free(waiter);
+fail:
+   waiter = NULL;
+   return -1;
+}
+
+void free_dmevent_waiter(void)
+{
+   struct dev_event *dev_evt;
+   int i;
+
+   if (!waiter)
+   return;
+   pthread_mutex_destroy(>events_lock);
+   close(waiter->fd);
+   vector_foreach_slot(waiter->events, dev_evt, i)
+   free(dev_evt);
+   vector_free(waiter->events);
+   free(waiter);
+   waiter = NULL;
+}
+
+static int arm_dm_event_poll(int fd)
+{
+   struct dm_ioctl dmi;
+   memset(, 0, sizeof(dmi));
+   dmi.version[0] = DM_VERSION_MAJOR;
+   dmi.version[1] = DM_VERSION_MINOR;
+

[dm-devel] [RFC PATCH 3/5] call start_waiter_thread() before setup_multipath()

2018-02-09 Thread Benjamin Marzinski
If setup_multipath() is called before the waiter thread has started,
there is a window where a dm event can occur between when
setup_multipath() updates the device state and when the waiter thread
starts waiting for new events, causing the new event to be missed and
the multipath device to not get updated.

Signed-off-by: Benjamin Marzinski 
---
 multipathd/main.c | 37 -
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/multipathd/main.c b/multipathd/main.c
index 94b2406..efc39d7 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -321,7 +321,7 @@ set_multipath_wwid (struct multipath * mpp)
 }
 
 static int
-update_map (struct multipath *mpp, struct vectors *vecs)
+update_map (struct multipath *mpp, struct vectors *vecs, int new_map)
 {
int retries = 3;
char params[PARAMS_SIZE] = {0};
@@ -351,6 +351,12 @@ retry:
dm_lib_release();
 
 fail:
+   if (new_map && (retries < 0 || start_waiter_thread(mpp, vecs))) {
+   condlog(0, "%s: failed to create new map", mpp->alias);
+   remove_map(mpp, vecs, 1);
+   return 1;
+   }
+
if (setup_multipath(vecs, mpp))
return 1;
 
@@ -395,12 +401,9 @@ add_map_without_path (struct vectors *vecs, char *alias)
 
vector_set_slot(vecs->mpvec, mpp);
 
-   if (update_map(mpp, vecs) != 0) /* map removed */
+   if (update_map(mpp, vecs, 1) != 0) /* map removed */
return NULL;
 
-   if (start_waiter_thread(mpp, vecs))
-   goto out;
-
return mpp;
 out:
remove_map(mpp, vecs, PURGE_VEC);
@@ -554,7 +557,7 @@ ev_add_map (char * dev, char * alias, struct vectors * vecs)
if (mpp->wait_for_udev > 1) {
condlog(2, "%s: performing delayed actions",
mpp->alias);
-   if (update_map(mpp, vecs))
+   if (update_map(mpp, vecs, 0))
/* setup multipathd removed the map */
return 1;
}
@@ -865,6 +868,11 @@ retry:
}
dm_lib_release();
 
+   if ((mpp->action == ACT_CREATE ||
+(mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
+   start_waiter_thread(mpp, vecs))
+   goto fail_map;
+
/*
 * update our state from kernel regardless of create or reload
 */
@@ -873,11 +881,6 @@ retry:
 
sync_map_state(mpp);
 
-   if ((mpp->action == ACT_CREATE ||
-(mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
-   start_waiter_thread(mpp, vecs))
-   goto fail_map;
-
if (retries >= 0) {
condlog(2, "%s [%s]: path added to devmap %s",
pp->dev, pp->dev_t, mpp->alias);
@@ -1479,7 +1482,8 @@ missing_uev_wait_tick(struct vectors *vecs)
if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
timed_out = 1;
condlog(0, "%s: timeout waiting on creation uevent. 
enabling reloads", mpp->alias);
-   if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
+   if (mpp->wait_for_udev > 1 &&
+   update_map(mpp, vecs, 0)) {
/* update_map removed map */
i--;
continue;
@@ -1511,7 +1515,7 @@ ghost_delay_tick(struct vectors *vecs)
condlog(0, "%s: timed out waiting for active path",
mpp->alias);
mpp->force_udev_reload = 1;
-   if (update_map(mpp, vecs) != 0) {
+   if (update_map(mpp, vecs, 0) != 0) {
/* update_map removed map */
i--;
continue;
@@ -2169,14 +2173,13 @@ configure (struct vectors * vecs)
 * start dm event waiter threads for these new maps
 */
vector_foreach_slot(vecs->mpvec, mpp, i) {
-   if (setup_multipath(vecs, mpp)) {
-   i--;
-   continue;
-   }
if (start_waiter_thread(mpp, vecs)) {
remove_map(mpp, vecs, 1);
i--;
+   continue;
}
+   if (setup_multipath(vecs, mpp))
+   i--;
}
return 0;
 }
-- 
2.7.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


[dm-devel] [RFC PATCH 1/5] libmultipath: move remove_map waiter code to multipathd

2018-02-09 Thread Benjamin Marzinski
Only multipathd needs to worry about the multipath waiter code. There is
no point in having remove_map_and_stop_waiter() or
remove_maps_and_stop_waiters() in libmultipath, since they should never
be use outside of multipathd.

Signed-off-by: Benjamin Marzinski 
---
 libmultipath/structs_vec.c | 40 +---
 libmultipath/structs_vec.h |  2 --
 multipathd/main.c  | 23 +++
 3 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
index 0de2221..abf5327 100644
--- a/libmultipath/structs_vec.c
+++ b/libmultipath/structs_vec.c
@@ -116,25 +116,16 @@ set_multipath_wwid (struct multipath * mpp)
dm_get_uuid(mpp->alias, mpp->wwid);
 }
 
-#define KEEP_WAITER 0
-#define STOP_WAITER 1
 #define PURGE_VEC 1
 
-static void
-_remove_map (struct multipath * mpp, struct vectors * vecs,
-   int stop_waiter, int purge_vec)
+void
+remove_map(struct multipath * mpp, struct vectors * vecs, int purge_vec)
 {
int i;
 
condlog(4, "%s: remove multipath map", mpp->alias);
 
/*
-* stop the DM event waiter thread
-*/
-   if (stop_waiter)
-   stop_waiter_thread(mpp, vecs);
-
-   /*
 * clear references to this map
 */
orphan_paths(vecs->pathvec, mpp);
@@ -149,19 +140,8 @@ _remove_map (struct multipath * mpp, struct vectors * vecs,
free_multipath(mpp, KEEP_PATHS);
 }
 
-void remove_map(struct multipath *mpp, struct vectors *vecs, int purge_vec)
-{
-   _remove_map(mpp, vecs, KEEP_WAITER, purge_vec);
-}
-
-void remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs,
-   int purge_vec)
-{
-   _remove_map(mpp, vecs, STOP_WAITER, purge_vec);
-}
-
-static void
-_remove_maps (struct vectors * vecs, int stop_waiter)
+void
+remove_maps(struct vectors * vecs)
 {
int i;
struct multipath * mpp;
@@ -170,7 +150,7 @@ _remove_maps (struct vectors * vecs, int stop_waiter)
return;
 
vector_foreach_slot (vecs->mpvec, mpp, i) {
-   _remove_map(mpp, vecs, stop_waiter, 1);
+   remove_map(mpp, vecs, 1);
i--;
}
 
@@ -178,16 +158,6 @@ _remove_maps (struct vectors * vecs, int stop_waiter)
vecs->mpvec = NULL;
 }
 
-void remove_maps(struct vectors *vecs)
-{
-   _remove_maps(vecs, KEEP_WAITER);
-}
-
-void remove_maps_and_stop_waiters(struct vectors *vecs)
-{
-   _remove_maps(vecs, STOP_WAITER);
-}
-
 void
 extract_hwe_from_path(struct multipath * mpp)
 {
diff --git a/libmultipath/structs_vec.h b/libmultipath/structs_vec.h
index b81413b..d6e17bb 100644
--- a/libmultipath/structs_vec.h
+++ b/libmultipath/structs_vec.h
@@ -27,9 +27,7 @@ int update_multipath_strings (struct multipath *mpp, vector 
pathvec,
 void extract_hwe_from_path(struct multipath * mpp);
 
 void remove_map (struct multipath * mpp, struct vectors * vecs, int purge_vec);
-void remove_map_and_stop_waiter (struct multipath * mpp, struct vectors * 
vecs, int purge_vec);
 void remove_maps (struct vectors * vecs);
-void remove_maps_and_stop_waiters (struct vectors * vecs);
 
 void sync_map_state (struct multipath *);
 int update_map (struct multipath *mpp, struct vectors *vecs);
diff --git a/multipathd/main.c b/multipathd/main.c
index 7ac59d9..72c3c2f 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -288,6 +288,29 @@ switch_pathgroup (struct multipath * mpp)
 mpp->alias, mpp->bestpg);
 }
 
+static void
+remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs,
+  int purge_vec)
+{
+   stop_waiter_thread(mpp, vecs);
+   remove_map(mpp, vecs, purge_vec);
+}
+
+static void
+remove_maps_and_stop_waiters(struct vectors *vecs)
+{
+   int i;
+   struct multipath * mpp;
+
+   if (!vecs)
+   return;
+
+   vector_foreach_slot(vecs->mpvec, mpp, i)
+   stop_waiter_thread(mpp, vecs);
+
+   remove_maps(vecs);
+}
+
 static int
 coalesce_maps(struct vectors *vecs, vector nmpv)
 {
-- 
2.7.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Benjamin Marzinski
On Sat, Feb 10, 2018 at 12:36:05AM +0100, Martin Wilck wrote:
> On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> > Maybe it's easier than we thought. Attached is a patch on top of
> > yours that I think might work, please have a look. 
> > 
> 
> That one didn't even compile. This one is better.
> 
> Martin

How about this one instead. The idea is that once we are in the cleanup
handler, we just cleanup and exit. But before we enter it, we atomically
exchange running, and if running was 0, we pause(), since the checker is
either about to cancel us, or already has.

diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index 894ad41..3774a17 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -214,15 +214,12 @@ retry:
 
 static void cleanup_func(void *data)
 {
-   int running, holders;
+   int holders;
struct tur_checker_context *ct = data;
 
-   running = uatomic_xchg(>running, 0);
holders = uatomic_sub_return(>holders, 1);
if (!holders)
cleanup_context(ct);
-   if (!running)
-   pause();
 }
 
 static int tur_running(struct tur_checker_context *ct)
@@ -242,7 +239,7 @@ static void copy_msg_to_tcc(void *ct_p, const char *msg)
 static void *tur_thread(void *ctx)
 {
struct tur_checker_context *ct = ctx;
-   int state;
+   int state, running;
char devt[32];
 
condlog(3, "%s: tur checker starting up",
@@ -268,6 +265,11 @@ static void *tur_thread(void *ctx)
 
condlog(3, "%s: tur checker finished, state %s",
tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
+
+   running = uatomic_xchg(>running, 0);
+   if (!running)
+   pause();
+
tur_thread_cleanup_pop(ct);
 
return ((void *)0);


> 
> -- 
> Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)

> From afb9c7de3658d49c4f28f6b9ee618a87b806ecdd Mon Sep 17 00:00:00 2001
> From: Martin Wilck 
> Date: Sat, 10 Feb 2018 00:22:17 +0100
> Subject: [PATCH] tur checker: make sure pthread_cancel isn't called for exited
>  thread
> 
> If we enter the cleanup function as the result of a pthread_cancel by another
> thread, we don't need to wait for a cancellation any more. If we exit
> regularly, just tell the other thread not to try to cancel us.
> ---
>  libmultipath/checkers/tur.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
> index 894ad41c89c3..5d2b36bfa883 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -214,15 +214,13 @@ retry:
>  
>  static void cleanup_func(void *data)
>  {
> - int running, holders;
> + int holders;
>   struct tur_checker_context *ct = data;
>  
> - running = uatomic_xchg(>running, 0);
> + uatomic_set(>running, 0);
>   holders = uatomic_sub_return(>holders, 1);
>   if (!holders)
>   cleanup_context(ct);
> - if (!running)
> - pause();
>  }
>  
>  static int tur_running(struct tur_checker_context *ct)
> @@ -266,6 +264,9 @@ static void *tur_thread(void *ctx)
>   pthread_cond_signal(>active);
>   pthread_mutex_unlock(>lock);
>  
> + /* Tell main checker thread not to cancel us, as we exit anyway */
> + uatomic_set(>running, 0);
> +
>   condlog(3, "%s: tur checker finished, state %s",
>   tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
>   tur_thread_cleanup_pop(ct);
> -- 
> 2.16.1
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Benjamin Marzinski
On Sat, Feb 10, 2018 at 12:36:05AM +0100, Martin Wilck wrote:
> On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> > Maybe it's easier than we thought. Attached is a patch on top of
> > yours that I think might work, please have a look. 
> > 
> 
> That one didn't even compile. This one is better.
> 
> Martin

So if we have this ordering

- checker calls uatomic_xchg() which returns 1 and then gets scheduled
- thread calls uatomic_set() and then runs till it terminates
- checker calls pthread_cancel()

You will get Bart's original bug.  I realize that having the condlog()
after the uatomic_set() in the thread makes this unlikely, but I don't
races like this. I would be happier with simply taking the original code
and moving the condlog(), if neither of my other two options are
acceptable.

-Ben

> 
> -- 
> Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)

> From afb9c7de3658d49c4f28f6b9ee618a87b806ecdd Mon Sep 17 00:00:00 2001
> From: Martin Wilck 
> Date: Sat, 10 Feb 2018 00:22:17 +0100
> Subject: [PATCH] tur checker: make sure pthread_cancel isn't called for exited
>  thread
> 
> If we enter the cleanup function as the result of a pthread_cancel by another
> thread, we don't need to wait for a cancellation any more. If we exit
> regularly, just tell the other thread not to try to cancel us.
> ---
>  libmultipath/checkers/tur.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
> index 894ad41c89c3..5d2b36bfa883 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -214,15 +214,13 @@ retry:
>  
>  static void cleanup_func(void *data)
>  {
> - int running, holders;
> + int holders;
>   struct tur_checker_context *ct = data;
>  
> - running = uatomic_xchg(>running, 0);
> + uatomic_set(>running, 0);
>   holders = uatomic_sub_return(>holders, 1);
>   if (!holders)
>   cleanup_context(ct);
> - if (!running)
> - pause();
>  }
>  
>  static int tur_running(struct tur_checker_context *ct)
> @@ -266,6 +264,9 @@ static void *tur_thread(void *ctx)
>   pthread_cond_signal(>active);
>   pthread_mutex_unlock(>lock);
>  
> + /* Tell main checker thread not to cancel us, as we exit anyway */
> + uatomic_set(>running, 0);
> +
>   condlog(3, "%s: tur checker finished, state %s",
>   tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
>   tur_thread_cleanup_pop(ct);
> -- 
> 2.16.1
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Martin Wilck
On Fri, 2018-02-09 at 17:04 -0600, Benjamin Marzinski wrote:
> On Fri, Feb 09, 2018 at 09:30:56PM +0100, Martin Wilck wrote:
> > On Thu, 2018-02-08 at 17:56 -0600, Benjamin Marzinski wrote:
> > > ct->running is now an atomic variable.  When the thread is
> > > started
> > > it is set to 1. When the checker wants to kill a thread, it
> > > atomically
> > > sets the value to 0 and reads the previous value.  If it was 1,
> > > the checker cancels the thread. If it was 0, the nothing needs to
> > > be
> > > done.  After the checker has dealt with the thread, it sets ct-
> > > > thread
> > > 
> > > to NULL.
> > > 
> > > When the thread is done, it atomicalllys sets the value of ct-
> > > > running
> > > 
> > > to 0 and reads the previous value. If it was 1, the thread just
> > > exits.
> > > If it was 0, then the checker is trying to cancel the thread, and
> > > so
> > > the thread calls pause(), which is a cancellation point.
> > > 
> > 
> > I'm missing one thing here. My poor brain is aching.
> > 
> > cleanup_func() can be entered in two ways: a) if the thread has
> > been
> > cancelled and passed a cancellation point already, or b) if it
> > exits
> > normally and calls pthread_cleanup_pop(). 
> > In case b), waiting for the cancellation request by calling pause()
> > makes sense to me. But in case a), the thread has already seen the
> > cancellation request - wouldn't calling pause() cause it to sleep
> > forever?
> 
> Urgh. You're right. If it is in the cleanup helper because it already
> has been cancelled, then the pause isn't going get cancelled. So much
> for my quick rewrite.

Maybe it's easier than we thought. Attached is a patch on top of yours
that I think might work, please have a look. 

It's quite late here, so I'll need to ponder your alternatives below
the other day.

Cheers
Martin

> 
> That leaves three options.
> 
> 1. have either the thread or the checker detach the thread (depending
>on which one exits first)
> 2. make the checker always cancel and detach the thread. This
> simplifies
>the code, but there will zombie threads hanging around between
> calls
>to the checker.
> 3. just move the condlog
> 
> I really don't care which one we pick anymore.
> 
> -Ben
> 
> > 
> > Martin
> > 
> > -- 
> > Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
> > SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham
> > Norton
> > HRB 21284 (AG Nürnberg)
> 
> 

-- 
Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
From 831ef27b41858fa248201b74f2dd8ea5b7c4aece Mon Sep 17 00:00:00 2001
From: Martin Wilck 
Date: Sat, 10 Feb 2018 00:22:17 +0100
Subject: [PATCH] tur checker: make sure pthread_cancel isn't called for exited
 thread

If we enter the cleanup function as the result of a pthread_cancel by another
thread, we don't need to wait for a cancellation any more. If we exit
regularly, just tell the other thread not to try to cancel us.
---
 libmultipath/checkers/tur.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index 894ad41c89c3..31a87d2b5cf2 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -221,8 +221,6 @@ static void cleanup_func(void *data)
 	holders = uatomic_sub_return(>holders, 1);
 	if (!holders)
 		cleanup_context(ct);
-	if (!running)
-		pause();
 }
 
 static int tur_running(struct tur_checker_context *ct)
@@ -266,6 +264,9 @@ static void *tur_thread(void *ctx)
 	pthread_cond_signal(>active);
 	pthread_mutex_unlock(>lock);
 
+	/* Tell main checker thread not to cancel us, as we exit anyway */
+	running = uatomic_xchg(>running, 0);
+
 	condlog(3, "%s: tur checker finished, state %s",
 		tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
 	tur_thread_cleanup_pop(ct);
-- 
2.16.1

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Martin Wilck
On Sat, 2018-02-10 at 00:28 +0100, Martin Wilck wrote:
> Maybe it's easier than we thought. Attached is a patch on top of
> yours that I think might work, please have a look. 
> 

That one didn't even compile. This one is better.

Martin

-- 
Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
From afb9c7de3658d49c4f28f6b9ee618a87b806ecdd Mon Sep 17 00:00:00 2001
From: Martin Wilck 
Date: Sat, 10 Feb 2018 00:22:17 +0100
Subject: [PATCH] tur checker: make sure pthread_cancel isn't called for exited
 thread

If we enter the cleanup function as the result of a pthread_cancel by another
thread, we don't need to wait for a cancellation any more. If we exit
regularly, just tell the other thread not to try to cancel us.
---
 libmultipath/checkers/tur.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c
index 894ad41c89c3..5d2b36bfa883 100644
--- a/libmultipath/checkers/tur.c
+++ b/libmultipath/checkers/tur.c
@@ -214,15 +214,13 @@ retry:
 
 static void cleanup_func(void *data)
 {
-	int running, holders;
+	int holders;
 	struct tur_checker_context *ct = data;
 
-	running = uatomic_xchg(>running, 0);
+	uatomic_set(>running, 0);
 	holders = uatomic_sub_return(>holders, 1);
 	if (!holders)
 		cleanup_context(ct);
-	if (!running)
-		pause();
 }
 
 static int tur_running(struct tur_checker_context *ct)
@@ -266,6 +264,9 @@ static void *tur_thread(void *ctx)
 	pthread_cond_signal(>active);
 	pthread_mutex_unlock(>lock);
 
+	/* Tell main checker thread not to cancel us, as we exit anyway */
+	uatomic_set(>running, 0);
+
 	condlog(3, "%s: tur checker finished, state %s",
 		tur_devt(devt, sizeof(devt), ct), checker_state_name(state));
 	tur_thread_cleanup_pop(ct);
-- 
2.16.1

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Benjamin Marzinski
On Fri, Feb 09, 2018 at 09:30:56PM +0100, Martin Wilck wrote:
> On Thu, 2018-02-08 at 17:56 -0600, Benjamin Marzinski wrote:
> > ct->running is now an atomic variable.  When the thread is started
> > it is set to 1. When the checker wants to kill a thread, it
> > atomically
> > sets the value to 0 and reads the previous value.  If it was 1,
> > the checker cancels the thread. If it was 0, the nothing needs to be
> > done.  After the checker has dealt with the thread, it sets ct-
> > >thread
> > to NULL.
> > 
> > When the thread is done, it atomicalllys sets the value of ct-
> > >running
> > to 0 and reads the previous value. If it was 1, the thread just
> > exits.
> > If it was 0, then the checker is trying to cancel the thread, and so
> > the thread calls pause(), which is a cancellation point.
> > 
> 
> I'm missing one thing here. My poor brain is aching.
> 
> cleanup_func() can be entered in two ways: a) if the thread has been
> cancelled and passed a cancellation point already, or b) if it exits
> normally and calls pthread_cleanup_pop(). 
> In case b), waiting for the cancellation request by calling pause()
> makes sense to me. But in case a), the thread has already seen the
> cancellation request - wouldn't calling pause() cause it to sleep
> forever?

Urgh. You're right. If it is in the cleanup helper because it already
has been cancelled, then the pause isn't going get cancelled. So much
for my quick rewrite.

That leaves three options.

1. have either the thread or the checker detach the thread (depending
   on which one exits first)
2. make the checker always cancel and detach the thread. This simplifies
   the code, but there will zombie threads hanging around between calls
   to the checker.
3. just move the condlog

I really don't care which one we pick anymore.

-Ben

> 
> Martin
> 
> -- 
> Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Martin Wilck
On Thu, 2018-02-08 at 17:56 -0600, Benjamin Marzinski wrote:
> ct->running is now an atomic variable.  When the thread is started
> it is set to 1. When the checker wants to kill a thread, it
> atomically
> sets the value to 0 and reads the previous value.  If it was 1,
> the checker cancels the thread. If it was 0, the nothing needs to be
> done.  After the checker has dealt with the thread, it sets ct-
> >thread
> to NULL.
> 
> When the thread is done, it atomicalllys sets the value of ct-
> >running
> to 0 and reads the previous value. If it was 1, the thread just
> exits.
> If it was 0, then the checker is trying to cancel the thread, and so
> the thread calls pause(), which is a cancellation point.
> 

I'm missing one thing here. My poor brain is aching.

cleanup_func() can be entered in two ways: a) if the thread has been
cancelled and passed a cancellation point already, or b) if it exits
normally and calls pthread_cleanup_pop(). 
In case b), waiting for the cancellation request by calling pause()
makes sense to me. But in case a), the thread has already seen the
cancellation request - wouldn't calling pause() cause it to sleep
forever?

Martin

-- 
Dr. Martin Wilck , Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Bart Van Assche
On Fri, 2018-02-09 at 11:26 -0600, Benjamin Marzinski wrote:
> On Fri, Feb 09, 2018 at 04:15:34PM +, Bart Van Assche wrote:
> > On Thu, 2018-02-08 at 17:56 -0600, Benjamin Marzinski wrote:
> > >  static void cleanup_func(void *data)
> > >  {
> > > - int holders;
> > > + int running, holders;
> > >   struct tur_checker_context *ct = data;
> > > - pthread_spin_lock(>hldr_lock);
> > > - ct->holders--;
> > > - holders = ct->holders;
> > > - ct->thread = 0;
> > > - pthread_spin_unlock(>hldr_lock);
> > > +
> > > + running = uatomic_xchg(>running, 0);
> > > + holders = uatomic_sub_return(>holders, 1);
> > >   if (!holders)
> > >   cleanup_context(ct);
> > > + if (!running)
> > > + pause();
> > >  }
> > 
> > Hello Ben,
> > 
> > Why has the pause() call been added? I think it is safe to call 
> > pthread_cancel()
> > for a non-detached thread that has finished so I don't think that pause() 
> > call
> > is necessary.
> 
> Martin objected to having the threads getting detached as part of
> cancelling them (I think. I'm a little fuzzy on what he didn't like).
> But he definitely said he preferred the thread to start detached, so in
> this version, it does.  That's why we need the pause().  If he's fine with
> the threads getting detached later, I will happily replace the pause()
> with
> 
> if (running)
>   pthread_detach(pthread_self());
> 
> and add pthread_detach(ct->thread) after the calls to
> pthread_cancel(ct->thread). Otherwise we need the pause() to solve your
> original bug.

Ah, thanks, I had overlooked that the tur checker detaches the checker thread. 
Have
you considered to add a comment above the pause() call that explains the 
purpose of
that call?

Thanks,

Bart.




--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Benjamin Marzinski
On Fri, Feb 09, 2018 at 04:15:34PM +, Bart Van Assche wrote:
> On Thu, 2018-02-08 at 17:56 -0600, Benjamin Marzinski wrote:
> >  static void cleanup_func(void *data)
> >  {
> > -   int holders;
> > +   int running, holders;
> > struct tur_checker_context *ct = data;
> > -   pthread_spin_lock(>hldr_lock);
> > -   ct->holders--;
> > -   holders = ct->holders;
> > -   ct->thread = 0;
> > -   pthread_spin_unlock(>hldr_lock);
> > +
> > +   running = uatomic_xchg(>running, 0);
> > +   holders = uatomic_sub_return(>holders, 1);
> > if (!holders)
> > cleanup_context(ct);
> > +   if (!running)
> > +   pause();
> >  }
> 
> Hello Ben,
> 
> Why has the pause() call been added? I think it is safe to call 
> pthread_cancel()
> for a non-detached thread that has finished so I don't think that pause() call
> is necessary.

Martin objected to having the threads getting detached as part of
cancelling them (I think. I'm a little fuzzy on what he didn't like).
But he definitely said he preferred the thread to start detached, so in
this version, it does.  That's why we need the pause().  If he's fine with
the threads getting detached later, I will happily replace the pause()
with

if (running)
pthread_detach(pthread_self());

and add pthread_detach(ct->thread) after the calls to
pthread_cancel(ct->thread). Otherwise we need the pause() to solve your
original bug.

As an aside, Martin, if your problem is the thread detaching itself, we
can skip that if we are fine with a zombie thread hanging around until
the next time we call libcheck_check() or libcheck_free(). Then the
checker can always be in charge of detaching the thread.
 
> >  static int tur_running(struct tur_checker_context *ct)
> >  {
> > -   pthread_t thread;
> > -
> > -   pthread_spin_lock(>hldr_lock);
> > -   thread = ct->thread;
> > -   pthread_spin_unlock(>hldr_lock);
> > -
> > -   return thread != 0;
> > +   return (uatomic_read(>running) != 0);
> >  }
> 
> Is such a one line function really useful?

Nope. I just left it there to keep the number of changes that the patch
makes lower, to make it more straightforward to review. I'm fine will
inlining it.

> I think the code will be easier to read if this function is inlined
> into its callers.

> > @@ -418,8 +396,12 @@ int libcheck_check(struct checker * c)
> > (tur_status == PATH_PENDING || tur_status == 
> > PATH_UNCHECKED)) {
> > condlog(3, "%s: tur checker still running",
> > tur_devt(devt, sizeof(devt), ct));
> > -   ct->running = 1;
> > tur_status = PATH_PENDING;
> > +   } else {
> > +   int running = uatomic_xchg(>running, 0);
> > +   if (running)
> > +   pthread_cancel(ct->thread);
> > +   ct->thread = 0;
> > }
> > }
> 
> Why has this pthread_cancel() call been added? I think that else clause can 
> only be
> reached if ct->running == 0 so I don't think that the pthread_cancel() call 
> will ever
> be reached.

It can be reached if ct->running is 1, as long as tur_status has been
updated.  In practice this means that the thread should have done
everything it needs to do, and all that's left is for it to shutdown.
However, if the thread doesn't shut itself down before the next time you
call libcheck_check(), the checker will give up and return PATH_TIMEOUT.

It seems pretty unlikely that this will happen, since there should be a
significant delay before calling libcheck_check() again. This
theoretical race has been in the code for a while, and AFAIK, it's never
occured. But there definitely is the possiblity that the thread will
still be running at the end of libcheck_check(), and it doesn't hurt
things to forceably shut the thread down, if it is. 

-Ben

> Thanks,
> 
> Bart.
> 
> 
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


Re: [dm-devel] [PATCH v2 1/7] libmultipath: fix tur checker locking

2018-02-09 Thread Bart Van Assche
On Thu, 2018-02-08 at 17:56 -0600, Benjamin Marzinski wrote:
>  static void cleanup_func(void *data)
>  {
> - int holders;
> + int running, holders;
>   struct tur_checker_context *ct = data;
> - pthread_spin_lock(>hldr_lock);
> - ct->holders--;
> - holders = ct->holders;
> - ct->thread = 0;
> - pthread_spin_unlock(>hldr_lock);
> +
> + running = uatomic_xchg(>running, 0);
> + holders = uatomic_sub_return(>holders, 1);
>   if (!holders)
>   cleanup_context(ct);
> + if (!running)
> + pause();
>  }

Hello Ben,

Why has the pause() call been added? I think it is safe to call pthread_cancel()
for a non-detached thread that has finished so I don't think that pause() call
is necessary.
 
>  static int tur_running(struct tur_checker_context *ct)
>  {
> - pthread_t thread;
> -
> - pthread_spin_lock(>hldr_lock);
> - thread = ct->thread;
> - pthread_spin_unlock(>hldr_lock);
> -
> - return thread != 0;
> + return (uatomic_read(>running) != 0);
>  }

Is such a one line function really useful? I think the code will be easier to 
read
if this function is inlined into its callers.

> @@ -418,8 +396,12 @@ int libcheck_check(struct checker * c)
>   (tur_status == PATH_PENDING || tur_status == 
> PATH_UNCHECKED)) {
>   condlog(3, "%s: tur checker still running",
>   tur_devt(devt, sizeof(devt), ct));
> - ct->running = 1;
>   tur_status = PATH_PENDING;
> + } else {
> + int running = uatomic_xchg(>running, 0);
> + if (running)
> + pthread_cancel(ct->thread);
> + ct->thread = 0;
>   }
>   }

Why has this pthread_cancel() call been added? I think that else clause can 
only be
reached if ct->running == 0 so I don't think that the pthread_cancel() call 
will ever
be reached.

Thanks,

Bart.




--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel