On Fri, Apr 03, 2026 at 09:23:56PM -0600, Uday Shankar wrote: > Before the fix, teardown of a ublk server that was attempting to recover > a device, but died when it had submitted a nonempty proper subset of the > fetch commands to any queue would loop forever. Add a test to verify > that, after the fix, teardown completes. This is done by: > > - Adding a new argument to the fault_inject target that causes it die > after fetching a nonempty proper subset of the IOs to a queue > - Using that argument in a new test while trying to recover an > already-created device > - Attempting to delete the ublk device at the end of the test; this > hangs forever if teardown from the fault-injected ublk server never > completed. > > It was manually verified that the test passes with the fix and hangs > without it. > > Signed-off-by: Uday Shankar <[email protected]> > --- > tools/testing/selftests/ublk/Makefile | 1 + > tools/testing/selftests/ublk/fault_inject.c | 51 > +++++++++++++++++++++++-- > tools/testing/selftests/ublk/kublk.c | 4 ++ > tools/testing/selftests/ublk/kublk.h | 3 ++ > tools/testing/selftests/ublk/test_generic_17.sh | 35 +++++++++++++++++ > 5 files changed, 91 insertions(+), 3 deletions(-) > > diff --git a/tools/testing/selftests/ublk/Makefile > b/tools/testing/selftests/ublk/Makefile > index > 8ac2d4a682a1768fb1eb9d2dd2a5d01294a67a03..d338668c5a5fbd73f6d70165455a3551ab13e894 > 100644 > --- a/tools/testing/selftests/ublk/Makefile > +++ b/tools/testing/selftests/ublk/Makefile > @@ -18,6 +18,7 @@ TEST_PROGS += test_generic_10.sh > TEST_PROGS += test_generic_12.sh > TEST_PROGS += test_generic_13.sh > TEST_PROGS += test_generic_16.sh > +TEST_PROGS += test_generic_17.sh > > TEST_PROGS += test_batch_01.sh > TEST_PROGS += test_batch_02.sh > diff --git a/tools/testing/selftests/ublk/fault_inject.c > b/tools/testing/selftests/ublk/fault_inject.c > index > 3b897f69c014cc73b4b469d816e80284dd21b577..228a9605053409c84baaf255f97c4abc271a8bfd > 100644 > --- a/tools/testing/selftests/ublk/fault_inject.c > +++ b/tools/testing/selftests/ublk/fault_inject.c > @@ -10,11 +10,17 @@ > > #include "kublk.h" > > +struct fi_opts { > + long long delay_ns; > + bool die_during_fetch; > +}; > + > static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, > struct ublk_dev *dev) > { > const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; > unsigned long dev_size = 250UL << 30; > + struct fi_opts *opts = NULL; > > if (ctx->auto_zc_fallback) { > ublk_err("%s: not support auto_zc_fallback\n", __func__); > @@ -35,17 +41,51 @@ static int ublk_fault_inject_tgt_init(const struct > dev_ctx *ctx, > }; > ublk_set_integrity_params(ctx, &dev->tgt.params); > > - dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us > * 1000); > + opts = calloc(1, sizeof(*opts)); > + if (!opts) { > + ublk_err("%s: couldn't allocate memory for opts\n", __func__); > + return -ENOMEM; > + } > + > + opts->delay_ns = ctx->fault_inject.delay_us * 1000; > + opts->die_during_fetch = ctx->fault_inject.die_during_fetch; > + dev->private_data = opts; > + > return 0; > } > > +static void ublk_fault_inject_pre_fetch_io(struct ublk_thread *t, > + struct ublk_queue *q, int tag) > +{ > + struct fi_opts *opts = q->dev->private_data; > + > + if (!opts->die_during_fetch) > + return; > + > + /* > + * Each queue fetches its IOs in increasing order of tags, so > + * dying just before we're about to fetch tag 1 (regardless of > + * what queue we're on) guarantees that we've fetched a nonempty > + * proper subset of the tags on that queue. > + */ > + if (tag == 1) { > + /* > + * Ensure our commands are actually live in the kernel > + * before we die. > + */ > + io_uring_submit(&t->ring); > + raise(SIGKILL); > + } > +} > + > static int ublk_fault_inject_queue_io(struct ublk_thread *t, > struct ublk_queue *q, int tag) > { > const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); > struct io_uring_sqe *sqe; > + struct fi_opts *opts = q->dev->private_data; > struct __kernel_timespec ts = { > - .tv_nsec = (long long)q->dev->private_data, > + .tv_nsec = opts->delay_ns, > }; > > ublk_io_alloc_sqes(t, &sqe, 1); > @@ -77,29 +117,34 @@ static void ublk_fault_inject_cmd_line(struct dev_ctx > *ctx, int argc, char *argv > { > static const struct option longopts[] = { > { "delay_us", 1, NULL, 0 }, > + { "die_during_fetch", 1, NULL, 0 }, > { 0, 0, 0, 0 } > }; > int option_idx, opt; > > ctx->fault_inject.delay_us = 0; > + ctx->fault_inject.die_during_fetch = false; > while ((opt = getopt_long(argc, argv, "", > longopts, &option_idx)) != -1) { > switch (opt) { > case 0: > if (!strcmp(longopts[option_idx].name, "delay_us")) > ctx->fault_inject.delay_us = strtoll(optarg, > NULL, 10); > + if (!strcmp(longopts[option_idx].name, > "die_during_fetch")) > + ctx->fault_inject.die_during_fetch = > strtoll(optarg, NULL, 10); > } > } > } > > static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) > { > - printf("\tfault_inject: [--delay_us us (default 0)]\n"); > + printf("\tfault_inject: [--delay_us us (default 0)] [--die_during_fetch > 1]\n"); > } > > const struct ublk_tgt_ops fault_inject_tgt_ops = { > .name = "fault_inject", > .init_tgt = ublk_fault_inject_tgt_init, > + .pre_fetch_io = ublk_fault_inject_pre_fetch_io, > .queue_io = ublk_fault_inject_queue_io, > .tgt_io_done = ublk_fault_inject_tgt_io_done, > .parse_cmd_line = ublk_fault_inject_cmd_line, > diff --git a/tools/testing/selftests/ublk/kublk.c > b/tools/testing/selftests/ublk/kublk.c > index > e1c3b3c55e565c8cad6b6fe9b9b764cd244818c0..8260c96a39c05584065f41a52f3d9050614454c6 > 100644 > --- a/tools/testing/selftests/ublk/kublk.c > +++ b/tools/testing/selftests/ublk/kublk.c > @@ -796,6 +796,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread > *t) > q = &t->dev->q[q_id]; > io = &q->ios[tag]; > io->buf_index = j++; > + if (q->tgt_ops->pre_fetch_io) > + q->tgt_ops->pre_fetch_io(t, q, tag); > ublk_queue_io_cmd(t, io); > } > } else { > @@ -807,6 +809,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread > *t) > for (i = 0; i < q->q_depth; i++) { > io = &q->ios[i]; > io->buf_index = i; > + if (q->tgt_ops->pre_fetch_io) > + q->tgt_ops->pre_fetch_io(t, q, i); > ublk_queue_io_cmd(t, io); > } > }
The callback needs to be called in ublk_batch_setup_queues() for F_BATCH too. Otherwise, this patch looks good. Thanks, Ming

