On Wed, Feb 25, 2026 at 03:36:10PM +0000, John Garry wrote:
> For failover handling, we must resubmit each bio.
>
> However, unlike NVMe, for SCSI there is no guarantee that any bio submitted
> is either all or none completed.
>
> As such, for SCSI, for failover handling we will take the approach to
> just re-submit the original bio. For this clone and submit each bio.
>
> Signed-off-by: John Garry <[email protected]>
> ---
> drivers/scsi/scsi_multipath.c | 51 ++++++++++++++++++++++++++++++++++-
> include/scsi/scsi_multipath.h | 1 +
> 2 files changed, 51 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/scsi/scsi_multipath.c b/drivers/scsi/scsi_multipath.c
> index 4b7984e7e74ba..d79a92ec0cf6c 100644
> --- a/drivers/scsi/scsi_multipath.c
> +++ b/drivers/scsi/scsi_multipath.c
> @@ -89,6 +89,14 @@ module_param_call(iopolicy, scsi_set_iopolicy,
> scsi_get_iopolicy,
> MODULE_PARM_DESC(iopolicy,
> "Default multipath I/O policy; 'numa' (default), 'round-robin' or
> 'queue-depth'");
>
> +struct scsi_mpath_clone_bio {
> + struct bio *master_bio;
> + struct bio clone;
> +};
If the only extra information you need for your clone bios is a pointer
to the original bio, I think you can just store that in bi_private. So
you shouldn't actually need to allocate any front pad for your bioset.
> +
> +#define scsi_mpath_to_master_bio(clone) \
> + container_of(clone, struct scsi_mpath_clone_bio, clone)
> +
> static int scsi_mpath_unique_lun_id(struct scsi_device *sdev)
> {
> struct scsi_mpath_device *scsi_mpath_dev = sdev->scsi_mpath_dev;
> @@ -260,6 +269,39 @@ static int scsi_multipath_sdev_init(struct scsi_device
> *sdev)
> return 0;
> }
>
> +static void scsi_mpath_clone_end_io(struct bio *clone)
> +{
> + struct scsi_mpath_clone_bio *scsi_mpath_clone_bio =
> + scsi_mpath_to_master_bio(clone);
> + struct bio *master_bio = scsi_mpath_clone_bio->master_bio;
> +
> + master_bio->bi_status = clone->bi_status;
> + bio_put(clone);
> + bio_endio(master_bio);
> +}
> +
> +static struct bio *scsi_mpath_clone_bio(struct bio *bio)
> +{
> + struct mpath_disk *mpath_disk = bio->bi_bdev->bd_disk->private_data;
> + struct mpath_head *mpath_head = mpath_disk->mpath_head;
> + struct scsi_mpath_clone_bio *scsi_mpath_clone_bio;
> + struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata;
> + struct bio *clone;
> +
> + clone = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOWAIT,
> + &scsi_mpath_head->bio_pool);
Why use GFP_NOWAIT? It's more likely to fail than GFP_NOIO. If the bio
has REQ_NOWAIT set, I can see where you would need this, but otherwise,
I don't see why GFP_NOIO wouldn't be better here.
> + if (!clone)
> + return NULL;
> +
> + clone->bi_end_io = scsi_mpath_clone_end_io;
> +
> + scsi_mpath_clone_bio = container_of(clone,
> + struct scsi_mpath_clone_bio, clone);
> + scsi_mpath_clone_bio->master_bio = bio;
> +
> + return clone;
> +}
> +
> static enum mpath_iopolicy_e scsi_mpath_get_iopolicy(struct mpath_head
> *mpath_head)
> {
> struct scsi_mpath_head *scsi_mpath_head = mpath_head->drvdata;
> @@ -269,6 +311,7 @@ static enum mpath_iopolicy_e
> scsi_mpath_get_iopolicy(struct mpath_head *mpath_he
>
> struct mpath_head_template smpdt_pr = {
> .get_iopolicy = scsi_mpath_get_iopolicy,
> + .clone_bio = scsi_mpath_clone_bio,
> };
>
> static struct scsi_mpath_head *scsi_mpath_alloc_head(void)
> @@ -283,9 +326,13 @@ static struct scsi_mpath_head
> *scsi_mpath_alloc_head(void)
> ida_init(&scsi_mpath_head->ida);
> mutex_init(&scsi_mpath_head->lock);
>
> + if (bioset_init(&scsi_mpath_head->bio_pool, SCSI_MAX_QUEUE_DEPTH,
> + offsetof(struct scsi_mpath_clone_bio, clone),
> + BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE))
You don't need 4096 cached bios to guarantee forward progress. I don't
see why BIO_POOL_SIZE won't work fine here. Also, since you are cloning
bios, they are sharing the original bio's iovecs, so you don't need
BIOSET_NEED_BVECS.
-Ben