Before the fix, teardown of a ublk server that was attempting to recover
a device, but died when it had submitted a nonempty proper subset of the
fetch commands to any queue would loop forever. Add a test to verify
that, after the fix, teardown completes. This is done by:

- Adding a new argument to the fault_inject target that causes it die
  after fetching a nonempty proper subset of the IOs to a queue
- Using that argument in a new test while trying to recover an
  already-created device
- Attempting to delete the ublk device at the end of the test; this
  hangs forever if teardown from the fault-injected ublk server never
  completed.

It was manually verified that the test passes with the fix and hangs
without it.

Signed-off-by: Uday Shankar <[email protected]>
---
 tools/testing/selftests/ublk/Makefile           |  1 +
 tools/testing/selftests/ublk/fault_inject.c     | 52 +++++++++++++++++++++++--
 tools/testing/selftests/ublk/kublk.c            |  7 ++++
 tools/testing/selftests/ublk/kublk.h            |  3 ++
 tools/testing/selftests/ublk/test_generic_17.sh | 35 +++++++++++++++++
 5 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/ublk/Makefile 
b/tools/testing/selftests/ublk/Makefile
index 
8ac2d4a682a1768fb1eb9d2dd2a5d01294a67a03..d338668c5a5fbd73f6d70165455a3551ab13e894
 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS += test_generic_10.sh
 TEST_PROGS += test_generic_12.sh
 TEST_PROGS += test_generic_13.sh
 TEST_PROGS += test_generic_16.sh
+TEST_PROGS += test_generic_17.sh
 
 TEST_PROGS += test_batch_01.sh
 TEST_PROGS += test_batch_02.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c 
b/tools/testing/selftests/ublk/fault_inject.c
index 
3b897f69c014cc73b4b469d816e80284dd21b577..150896e02ff8b3747fdc45eb2a6df7b52e134485
 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -10,11 +10,17 @@
 
 #include "kublk.h"
 
+struct fi_opts {
+       long long delay_ns;
+       bool die_during_fetch;
+};
+
 static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
                                      struct ublk_dev *dev)
 {
        const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
        unsigned long dev_size = 250UL << 30;
+       struct fi_opts *opts = NULL;
 
        if (ctx->auto_zc_fallback) {
                ublk_err("%s: not support auto_zc_fallback\n", __func__);
@@ -35,17 +41,52 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx 
*ctx,
        };
        ublk_set_integrity_params(ctx, &dev->tgt.params);
 
-       dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us 
* 1000);
+       opts = calloc(1, sizeof(*opts));
+       if (!opts) {
+               ublk_err("%s: couldn't allocate memory for opts\n", __func__);
+               return -ENOMEM;
+       }
+
+       opts->delay_ns = ctx->fault_inject.delay_us * 1000;
+       opts->die_during_fetch = ctx->fault_inject.die_during_fetch;
+       dev->private_data = opts;
+
        return 0;
 }
 
+static void ublk_fault_inject_pre_fetch_io(struct ublk_thread *t,
+                                          struct ublk_queue *q, int tag,
+                                          bool batch)
+{
+       struct fi_opts *opts = q->dev->private_data;
+
+       if (!opts->die_during_fetch)
+               return;
+
+       /*
+        * Each queue fetches its IOs in increasing order of tags, so
+        * dying just before we're about to fetch tag 1 (regardless of
+        * what queue we're on) guarantees that we've fetched a nonempty
+        * proper subset of the tags on that queue.
+        */
+       if (tag == 1) {
+               /*
+                * Ensure our commands are actually live in the kernel
+                * before we die.
+                */
+               io_uring_submit(&t->ring);
+               raise(SIGKILL);
+       }
+}
+
 static int ublk_fault_inject_queue_io(struct ublk_thread *t,
                                      struct ublk_queue *q, int tag)
 {
        const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
        struct io_uring_sqe *sqe;
+       struct fi_opts *opts = q->dev->private_data;
        struct __kernel_timespec ts = {
-               .tv_nsec = (long long)q->dev->private_data,
+               .tv_nsec = opts->delay_ns,
        };
 
        ublk_io_alloc_sqes(t, &sqe, 1);
@@ -77,29 +118,34 @@ static void ublk_fault_inject_cmd_line(struct dev_ctx 
*ctx, int argc, char *argv
 {
        static const struct option longopts[] = {
                { "delay_us",   1,      NULL,  0  },
+               { "die_during_fetch", 1, NULL, 0  },
                { 0, 0, 0, 0 }
        };
        int option_idx, opt;
 
        ctx->fault_inject.delay_us = 0;
+       ctx->fault_inject.die_during_fetch = false;
        while ((opt = getopt_long(argc, argv, "",
                                  longopts, &option_idx)) != -1) {
                switch (opt) {
                case 0:
                        if (!strcmp(longopts[option_idx].name, "delay_us"))
                                ctx->fault_inject.delay_us = strtoll(optarg, 
NULL, 10);
+                       if (!strcmp(longopts[option_idx].name, 
"die_during_fetch"))
+                               ctx->fault_inject.die_during_fetch = 
strtoll(optarg, NULL, 10);
                }
        }
 }
 
 static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
 {
-       printf("\tfault_inject: [--delay_us us (default 0)]\n");
+       printf("\tfault_inject: [--delay_us us (default 0)] [--die_during_fetch 
1]\n");
 }
 
 const struct ublk_tgt_ops fault_inject_tgt_ops = {
        .name = "fault_inject",
        .init_tgt = ublk_fault_inject_tgt_init,
+       .pre_fetch_io = ublk_fault_inject_pre_fetch_io,
        .queue_io = ublk_fault_inject_queue_io,
        .tgt_io_done = ublk_fault_inject_tgt_io_done,
        .parse_cmd_line = ublk_fault_inject_cmd_line,
diff --git a/tools/testing/selftests/ublk/kublk.c 
b/tools/testing/selftests/ublk/kublk.c
index 
e1c3b3c55e565c8cad6b6fe9b9b764cd244818c0..e5b787ba21754719247bb17e1874313f52c35ed1
 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -796,6 +796,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread 
*t)
                        q = &t->dev->q[q_id];
                        io = &q->ios[tag];
                        io->buf_index = j++;
+                       if (q->tgt_ops->pre_fetch_io)
+                               q->tgt_ops->pre_fetch_io(t, q, tag, false);
                        ublk_queue_io_cmd(t, io);
                }
        } else {
@@ -807,6 +809,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread 
*t)
                for (i = 0; i < q->q_depth; i++) {
                        io = &q->ios[i];
                        io->buf_index = i;
+                       if (q->tgt_ops->pre_fetch_io)
+                               q->tgt_ops->pre_fetch_io(t, q, i, false);
                        ublk_queue_io_cmd(t, io);
                }
        }
@@ -983,6 +987,9 @@ static void ublk_batch_setup_queues(struct ublk_thread *t)
                if (t->q_map[i] == 0)
                        continue;
 
+               if (q->tgt_ops->pre_fetch_io)
+                       q->tgt_ops->pre_fetch_io(t, q, 0, true);
+
                ret = ublk_batch_queue_prep_io_cmds(t, q);
                ublk_assert(ret >= 0);
        }
diff --git a/tools/testing/selftests/ublk/kublk.h 
b/tools/testing/selftests/ublk/kublk.h
index 
02f0c55d006b4c791fea4456687d0d8757be7be2..6d1762aa30dfc8e72ff92aa86c8faba12169f644
 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -60,6 +60,7 @@ struct stripe_ctx {
 struct fault_inject_ctx {
        /* fault_inject */
        unsigned long   delay_us;
+       bool die_during_fetch;
 };
 
 struct dev_ctx {
@@ -138,6 +139,8 @@ struct ublk_tgt_ops {
        int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
        void (*deinit_tgt)(struct ublk_dev *);
 
+       void (*pre_fetch_io)(struct ublk_thread *t, struct ublk_queue *q,
+                            int tag, bool batch);
        int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
        void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
                            const struct io_uring_cqe *);
diff --git a/tools/testing/selftests/ublk/test_generic_17.sh 
b/tools/testing/selftests/ublk/test_generic_17.sh
new file mode 100755
index 
0000000000000000000000000000000000000000..2278b5fc9dba523bb1bf8411d22f4f95a28da905
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_17.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_prep_test "fault_inject" "teardown after incomplete recovery"
+
+# First start and stop a ublk server with device configured for recovery
+dev_id=$(_add_ublk_dev -t fault_inject -r 1)
+_check_add_dev $TID $?
+state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+if [ "$state" != "QUIESCED" ]; then
+        echo "device isn't quiesced($state) after $action"
+        ERR_CODE=255
+fi
+
+# Then recover the device, but use --die_during_fetch to have the ublk
+# server die while a queue has some (but not all) I/Os fetched
+${UBLK_PROG} recover -n "${dev_id}" --foreground -t fault_inject 
--die_during_fetch 1
+RECOVER_RES=$?
+# 137 is the result when dying of SIGKILL
+if (( RECOVER_RES != 137 )); then
+        echo "recover command exited with unexpected code ${RECOVER_RES}!"
+        ERR_CODE=255
+fi
+
+# Clean up the device. This can only succeed once teardown of the above
+# exited ublk server completes. So if teardown never completes, we will
+# time out here
+_ublk_del_dev "${dev_id}"
+
+_cleanup_test "fault_inject"
+_show_result $TID $ERR_CODE

-- 
2.34.1


Reply via email to