Add a framework for an asynchronous path checker utilizing the
recently added runner code for thread handling.

This code has been derived from the TUR checker code by removing
all references to the actual sending of TUR ioctls.

Follow-up patches will convert the current TUR checker into an
instance of this async checker class.

Signed-off-by: Martin Wilck <[email protected]>
---
 libmultipath/Makefile        |   2 +-
 libmultipath/async_checker.c | 216 +++++++++++++++++++++++++++++++++++
 libmultipath/async_checker.h |  35 ++++++
 libmultipath/checkers.c      |   1 +
 libmultipath/checkers.h      |   2 +
 5 files changed, 255 insertions(+), 1 deletion(-)
 create mode 100644 libmultipath/async_checker.c
 create mode 100644 libmultipath/async_checker.h

diff --git a/libmultipath/Makefile b/libmultipath/Makefile
index 85767ab..d71a835 100644
--- a/libmultipath/Makefile
+++ b/libmultipath/Makefile
@@ -22,7 +22,7 @@ OBJS-O := devmapper.o hwtable.o blacklist.o dmparser.o \
        configure.o structs_vec.o sysfs.o \
        lock.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \
        io_err_stat.o dm-generic.o generic.o nvme-lib.o \
-       libsg.o valid.o
+       libsg.o valid.o async_checker.o
 
 OBJS := $(OBJS-O) $(OBJS-U)
 
diff --git a/libmultipath/async_checker.c b/libmultipath/async_checker.c
new file mode 100644
index 0000000..06159dd
--- /dev/null
+++ b/libmultipath/async_checker.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2026 SUSE LLC
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include "async_checker.h"
+#include "checkers.h"
+#include "debug.h"
+#include "runner.h"
+
+#define MAX_NR_TIMEOUTS 1
+
+struct async_checker_context {
+       struct checker_context chkr;
+       int last_runner_state;
+       unsigned int nr_timeouts;
+       struct runner_context *rtx;
+       struct runner_data rdata;
+};
+
+#define rdata_size(acc) (sizeof(acc->rdata))
+
+int async_check_init(struct checker *c)
+{
+       struct async_checker_context *acc;
+       struct stat sb;
+
+       acc = calloc(1, sizeof(*acc));
+       if (!acc)
+               return -1;
+       acc->rdata.state = PATH_UNCHECKED;
+       acc->rdata.fd = -1;
+       if (fstat(c->fd, &sb) == 0)
+               acc->rdata.devt = sb.st_rdev;
+       acc->chkr.cls = c->cls;
+       c->context = acc;
+       return 0;
+}
+
+void async_check_free(struct checker *c)
+{
+       struct async_checker_context *acc = c->context;
+
+       if (!acc)
+               return;
+       c->context = NULL;
+       if (acc->rtx)
+               release_runner(acc->rtx);
+       free(acc);
+}
+
+static void runner_callback(void *arg)
+{
+       struct runner_data *rdata = arg;
+       int state;
+
+       condlog(4, "%d:%d : async checker starting up", major(rdata->devt),
+               minor(rdata->devt));
+
+       async_deep_sleep(rdata);
+       state = rdata->afunc(rdata);
+       rdata->state = state;
+       pthread_testcancel();
+       condlog(4, "%d:%d : async checker finished, state %s", 
major(rdata->devt),
+               minor(rdata->devt), checker_state_name(state));
+}
+
+static int check_runner_state(struct async_checker_context *acc)
+{
+       struct runner_context *rtx = acc->rtx;
+       int rc;
+
+       rc = check_runner(rtx, &acc->rdata, rdata_size(acc));
+       switch (rc) {
+       case RUNNER_DEAD:
+               acc->rdata.state = PATH_TIMEOUT;
+               acc->rdata.msgid = CHECKER_MSGID_TIMEOUT;
+               /* fallthrough */
+       case RUNNER_DONE:
+               release_runner(acc->rtx);
+               acc->rtx = NULL;
+               acc->last_runner_state = rc;
+               acc->nr_timeouts = 0;
+               condlog(rc == RUNNER_DONE ? 4 : 3,
+                       "%d:%d : async checker finished, state %s, runner state 
%s",
+                       major(acc->rdata.devt), minor(acc->rdata.devt),
+                       checker_state_name(acc->rdata.state),
+                       runner_state_name(rc));
+               break;
+       case RUNNER_CANCELLED:
+               acc->last_runner_state = rc;
+               acc->rdata.state = PATH_TIMEOUT;
+               acc->rdata.msgid = CHECKER_MSGID_TIMEOUT;
+               if (acc->nr_timeouts < MAX_NR_TIMEOUTS) {
+                       condlog(3, "%d:%d : async checker timed out, releasing 
it",
+                               major(acc->rdata.devt), minor(acc->rdata.devt));
+                       acc->nr_timeouts++;
+                       release_runner(acc->rtx);
+                       acc->rtx = NULL;
+               } else if (acc->nr_timeouts == MAX_NR_TIMEOUTS) {
+                       acc->nr_timeouts++;
+                       condlog(3, "%d:%d : async checker timed out, waiting 
for it",
+                               major(acc->rdata.devt), minor(acc->rdata.devt));
+               }
+               break;
+       default:
+               condlog(4, "%d:%d : async checker still running",
+                       major(acc->rdata.devt), minor(acc->rdata.devt));
+               acc->rdata.msgid = CHECKER_MSGID_RUNNING;
+               break;
+       }
+       return rc;
+}
+
+bool async_check_need_wait(struct checker *c)
+{
+       struct async_checker_context *acc = c->context;
+
+       return acc && acc->rtx;
+}
+
+int async_check_pending(struct checker *c)
+{
+       struct async_checker_context *acc = c->context;
+       /* The if path checker isn't running, just return the exiting value. */
+       if (!acc || !acc->rtx)
+               return c->path_state;
+
+       /* This may nullify ct->rtx */
+       check_runner_state(acc);
+       c->msgid = acc->rdata.msgid;
+       return acc->rdata.state;
+}
+
+int async_check_check(struct checker *c)
+{
+       struct async_checker_context *acc = c->context;
+
+       if (!acc)
+               return PATH_UNCHECKED;
+
+       if (checker_is_sync(c))
+               return acc->rdata.afunc(&acc->rdata);
+
+       /* Handle the case that the checker just completed */
+       if (acc->rtx) {
+               check_runner_state(acc);
+               c->msgid = acc->rdata.msgid;
+               return acc->rdata.state;
+       }
+
+       /* create new checker thread */
+       acc->rdata.fd = c->fd;
+       acc->rdata.timeout = c->timeout;
+
+       acc->rdata.state = PATH_PENDING;
+       acc->rdata.msgid = CHECKER_MSGID_RUNNING;
+       acc->rdata.afunc = c->cls->async_func;
+       condlog(4, "%d:%d : starting checker", major(acc->rdata.devt),
+               minor(acc->rdata.devt));
+       acc->rtx = get_runner(runner_callback, &acc->rdata, rdata_size(acc),
+                             1000000 * c->timeout);
+
+       if (acc->rtx) {
+               c->msgid = acc->rdata.msgid;
+               return acc->rdata.state;
+       } else {
+               condlog(3, "%d:%d : failed to start async thread, using sync 
mode",
+                       major(acc->rdata.devt), minor(acc->rdata.devt));
+               return acc->rdata.afunc(&acc->rdata);
+       }
+}
+
+/*
+ * Test code for "zombie tur thread" handling.
+ * Compile e.g. with CFLAGS=-DASYNC_TEST_MAJOR=8
+ * Additional parameters can be configure with the macros below.
+ *
+ * Everty nth started thread will hang in non-cancellable state
+ * for given number of seconds, for device given by major/minor.
+ */
+#ifdef ASYNC_TEST_MAJOR
+#ifndef ASYNC_TEST_MINOR
+#define ASYNC_TEST_MINOR 0
+#endif
+#ifndef ASYNC_SLEEP_INTERVAL
+#define ASYNC_SLEEP_INTERVAL 3
+#endif
+#ifndef ASYNC_SLEEP_SECS
+#define ASYNC_SLEEP_SECS 60
+#endif
+
+static void async_deep_sleep(const struct runner_data *rdata)
+{
+       static int sleep_cnt;
+       const struct timespec ts = {.tv_sec = ASYNC_SLEEP_SECS, .tv_nsec = 0};
+       int oldstate;
+
+       if (rdata->devt != makedev(ASYNC_TEST_MAJOR, ASYNC_TEST_MINOR) ||
+           ++sleep_cnt % ASYNC_SLEEP_INTERVAL == 0)
+               return;
+
+       condlog(3, "async thread going to sleep for %ld seconds", ts.tv_sec);
+       if (pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0)
+               condlog(0, "pthread_setcancelstate: %m");
+       if (nanosleep(&ts, NULL) != 0)
+               condlog(0, "nanosleep: %m");
+       condlog(3, "async zombie thread woke up");
+       if (pthread_setcancelstate(oldstate, NULL) != 0)
+               condlog(0, "pthread_setcancelstate (2): %m");
+       pthread_testcancel();
+}
+#endif /* ASYNC_TEST_MAJOR */
diff --git a/libmultipath/async_checker.h b/libmultipath/async_checker.h
new file mode 100644
index 0000000..835de2a
--- /dev/null
+++ b/libmultipath/async_checker.h
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2026 SUSE LLC
+#ifndef ASYNC_CHECKER_H_INCLUDED
+#define ASYNC_CHECKER_H_INCLUDED
+
+struct runner_data;
+struct checker;
+typedef int (*async_checker_func)(struct runner_data *);
+
+struct runner_data {
+       int fd;
+       dev_t devt;
+       async_checker_func afunc;
+       unsigned int timeout;
+       int state;
+       short msgid;
+       char checker_ctx[];
+};
+
+int async_check_init(struct checker *c);
+void async_check_free(struct checker *c);
+bool async_check_need_wait(struct checker *c);
+int async_check_pending(struct checker *c);
+int async_check_check(struct checker *c);
+
+#define CHECKER_MAX_CONTEXT_SIZE 1024
+
+/* For testing handling of async checker timeouts */
+#ifdef ASYNC_TEST_MAJOR
+static void async_deep_sleep(const struct runner_data *rdata);
+#else
+#define async_deep_sleep(x) do {} while (0)
+#endif
+
+#endif
diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
index a3b9cc8..3a1663f 100644
--- a/libmultipath/checkers.c
+++ b/libmultipath/checkers.c
@@ -9,6 +9,7 @@
 
 #include "debug.h"
 #include "checkers.h"
+#include "async_checker.h"
 #include "vector.h"
 #include "util.h"
 
diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h
index 630e987..744be54 100644
--- a/libmultipath/checkers.h
+++ b/libmultipath/checkers.h
@@ -132,6 +132,7 @@ enum {
 };
 
 struct checker;
+struct runner_data;
 struct checker_class {
        struct list_head node;
        void *handle;
@@ -144,6 +145,7 @@ struct checker_class {
        void (*reset)(void);              /* to reset the global variables */
        int (*pending)(struct checker *); /* to recheck pending paths */
        bool (*need_wait)(struct checker *); /* checker needs waiting for */
+       int (*async_func)(struct runner_data *); /* callback for async_checker 
*/
        const char **msgtable;
        short msgtable_size;
 };
-- 
2.54.0


Reply via email to