On 2024/05/20 12:20, Nitesh Shetty wrote:
> We add two new opcode REQ_OP_COPY_DST, REQ_OP_COPY_SRC.
> Since copy is a composite operation involving src and dst sectors/lba,
> each needs to be represented by a separate bio to make it compatible
> with device mapper.

Why ? The beginning of the sentence isn't justification enough for the two new
operation codes ? The 2 sentences should be reversed for easier reading:
justification first naturally leads to the reader understanding why the codes
are needed.

Also: s/opcode/operations


> We expect caller to take a plug and send bio with destination information,
> followed by bio with source information.

expect ? Plugging is optional. Does copy offload require it ? Please clarify 
this.

> Once the dst bio arrives we form a request and wait for source

arrives ? You mean "is submitted" ?

s/and wait for/and wait for the

> bio. Upon arrival of source bio we merge these two bio's and send

s/arrival/submission ?

s/of/of the
s/bio's/BIOs
s/and send/and send the
s/down to/down to the

> corresponding request down to device driver.
> Merging non copy offload bio is avoided by checking for copy specific
> opcodes in merge function.

Super unclear... What are you trying to say here ? That merging copy offload
BIOs with other BIOs is not allowed ? That is already handled. Only BIOs &
requests with the same operation can be merged. The code below also suggests
that you allow merging copy offloads... So I really do not understand this.

> 
> Signed-off-by: Nitesh Shetty <nj.she...@samsung.com>
> Signed-off-by: Anuj Gupta <anuj2...@samsung.com>
> ---
>  block/blk-core.c          |  7 +++++++
>  block/blk-merge.c         | 41 +++++++++++++++++++++++++++++++++++++++
>  block/blk.h               | 16 +++++++++++++++
>  block/elevator.h          |  1 +
>  include/linux/bio.h       |  6 +-----
>  include/linux/blk_types.h | 10 ++++++++++
>  6 files changed, 76 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index ea44b13af9af..f18ee5f709c0 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -122,6 +122,8 @@ static const char *const blk_op_name[] = {
>       REQ_OP_NAME(ZONE_FINISH),
>       REQ_OP_NAME(ZONE_APPEND),
>       REQ_OP_NAME(WRITE_ZEROES),
> +     REQ_OP_NAME(COPY_SRC),
> +     REQ_OP_NAME(COPY_DST),
>       REQ_OP_NAME(DRV_IN),
>       REQ_OP_NAME(DRV_OUT),
>  };
> @@ -838,6 +840,11 @@ void submit_bio_noacct(struct bio *bio)
>                * requests.
>                */
>               fallthrough;
> +     case REQ_OP_COPY_SRC:
> +     case REQ_OP_COPY_DST:
> +             if (!q->limits.max_copy_sectors)
> +                     goto not_supported;
> +             break;
>       default:
>               goto not_supported;
>       }
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index 8534c35e0497..f8dc48a03379 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -154,6 +154,20 @@ static struct bio *bio_split_write_zeroes(struct bio 
> *bio,
>       return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
>  }
>  
> +static struct bio *bio_split_copy(struct bio *bio,
> +                               const struct queue_limits *lim,
> +                               unsigned int *nsegs)
> +{
> +     *nsegs = 1;
> +     if (bio_sectors(bio) <= lim->max_copy_sectors)
> +             return NULL;
> +     /*
> +      * We don't support splitting for a copy bio. End it with EIO if
> +      * splitting is required and return an error pointer.
> +      */
> +     return ERR_PTR(-EIO);
> +}

Hmm... Why not check that the copy request is small enough and will not be split
when it is submitted ? Something like blk_check_zone_append() does with
REQ_OP_ZONE_APPEND ? So adding a blk_check_copy_offload(). That would also
include the limits check from the previous hunk.

> +
>  /*
>   * Return the maximum number of sectors from the start of a bio that may be
>   * submitted as a single request to a block device. If enough sectors remain,
> @@ -362,6 +376,12 @@ struct bio *__bio_split_to_limits(struct bio *bio,
>       case REQ_OP_WRITE_ZEROES:
>               split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
>               break;
> +     case REQ_OP_COPY_SRC:
> +     case REQ_OP_COPY_DST:
> +             split = bio_split_copy(bio, lim, nr_segs);
> +             if (IS_ERR(split))
> +                     return NULL;
> +             break;

See above.

>       default:
>               split = bio_split_rw(bio, lim, nr_segs, bs,
>                               get_max_io_size(bio, lim) << SECTOR_SHIFT);
> @@ -925,6 +945,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
>       if (!rq_mergeable(rq) || !bio_mergeable(bio))
>               return false;
>  
> +     if (blk_copy_offload_mergable(rq, bio))
> +             return true;
> +
>       if (req_op(rq) != bio_op(bio))
>               return false;
>  
> @@ -958,6 +981,8 @@ enum elv_merge blk_try_merge(struct request *rq, struct 
> bio *bio)
>  {
>       if (blk_discard_mergable(rq))
>               return ELEVATOR_DISCARD_MERGE;
> +     else if (blk_copy_offload_mergable(rq, bio))
> +             return ELEVATOR_COPY_OFFLOAD_MERGE;
>       else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
>               return ELEVATOR_BACK_MERGE;
>       else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
> @@ -1065,6 +1090,20 @@ static enum bio_merge_status 
> bio_attempt_discard_merge(struct request_queue *q,
>       return BIO_MERGE_FAILED;
>  }
>  
> +static enum bio_merge_status bio_attempt_copy_offload_merge(struct request 
> *req,
> +                                                         struct bio *bio)
> +{
> +     if (req->__data_len != bio->bi_iter.bi_size)
> +             return BIO_MERGE_FAILED;
> +
> +     req->biotail->bi_next = bio;
> +     req->biotail = bio;
> +     req->nr_phys_segments++;
> +     req->__data_len += bio->bi_iter.bi_size;

Arg... You seem to be assuming that the source BIO always comes right after the
destination request... What if copy offloads are being concurrently issued ?
Shouldn't you check somehow that the pair is a match ? Or are you relying on the
per-context plugging which prevents that from happening in the first place ? But
that would assumes that you never ever sleep trying to allocate the source BIO
after the destination BIO/request are prepared and plugged.

> +
> +     return BIO_MERGE_OK;
> +}
> +
>  static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
>                                                  struct request *rq,
>                                                  struct bio *bio,
> @@ -1085,6 +1124,8 @@ static enum bio_merge_status 
> blk_attempt_bio_merge(struct request_queue *q,
>               break;
>       case ELEVATOR_DISCARD_MERGE:
>               return bio_attempt_discard_merge(q, rq, bio);
> +     case ELEVATOR_COPY_OFFLOAD_MERGE:
> +             return bio_attempt_copy_offload_merge(rq, bio);
>       default:
>               return BIO_MERGE_NONE;
>       }
> diff --git a/block/blk.h b/block/blk.h
> index 189bc25beb50..6528a2779b84 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -174,6 +174,20 @@ static inline bool blk_discard_mergable(struct request 
> *req)
>       return false;
>  }
>  
> +/*
> + * Copy offload sends a pair of bio with REQ_OP_COPY_DST and REQ_OP_COPY_SRC
> + * operation by taking a plug.
> + * Initially DST bio is sent which forms a request and
> + * waits for SRC bio to arrive. Once SRC bio arrives
> + * we merge it and send request down to driver.
> + */
> +static inline bool blk_copy_offload_mergable(struct request *req,
> +                                          struct bio *bio)
> +{
> +     return (req_op(req) == REQ_OP_COPY_DST &&
> +             bio_op(bio) == REQ_OP_COPY_SRC);
> +}

This function is really not needed at all (used in one place only).

> +
>  static inline unsigned int blk_rq_get_max_segments(struct request *rq)
>  {
>       if (req_op(rq) == REQ_OP_DISCARD)
> @@ -323,6 +337,8 @@ static inline bool bio_may_exceed_limits(struct bio *bio,
>       case REQ_OP_DISCARD:
>       case REQ_OP_SECURE_ERASE:
>       case REQ_OP_WRITE_ZEROES:
> +     case REQ_OP_COPY_SRC:
> +     case REQ_OP_COPY_DST:
>               return true; /* non-trivial splitting decisions */

See above. Limits should be checked on submission.

>       default:
>               break;
> diff --git a/block/elevator.h b/block/elevator.h
> index e9a050a96e53..c7a45c1f4156 100644
> --- a/block/elevator.h
> +++ b/block/elevator.h
> @@ -18,6 +18,7 @@ enum elv_merge {
>       ELEVATOR_FRONT_MERGE    = 1,
>       ELEVATOR_BACK_MERGE     = 2,
>       ELEVATOR_DISCARD_MERGE  = 3,
> +     ELEVATOR_COPY_OFFLOAD_MERGE     = 4,
>  };
>  
>  struct blk_mq_alloc_data;
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index d5379548d684..528ef22dd65b 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -53,11 +53,7 @@ static inline unsigned int bio_max_segs(unsigned int 
> nr_segs)
>   */
>  static inline bool bio_has_data(struct bio *bio)
>  {
> -     if (bio &&
> -         bio->bi_iter.bi_size &&
> -         bio_op(bio) != REQ_OP_DISCARD &&
> -         bio_op(bio) != REQ_OP_SECURE_ERASE &&
> -         bio_op(bio) != REQ_OP_WRITE_ZEROES)
> +     if (bio && (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE))
>               return true;

This change seems completely broken and out of place. This would cause a return
of false for zone append operations.

>  
>       return false;
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 781c4500491b..7f692bade271 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -342,6 +342,10 @@ enum req_op {
>       /* reset all the zone present on the device */
>       REQ_OP_ZONE_RESET_ALL   = (__force blk_opf_t)15,
>  
> +     /* copy offload src and dst operation */

s/src/source
s/dst/destination
s/operation/operations

> +     REQ_OP_COPY_SRC         = (__force blk_opf_t)18,
> +     REQ_OP_COPY_DST         = (__force blk_opf_t)19,
> +
>       /* Driver private requests */
>       REQ_OP_DRV_IN           = (__force blk_opf_t)34,
>       REQ_OP_DRV_OUT          = (__force blk_opf_t)35,
> @@ -430,6 +434,12 @@ static inline bool op_is_write(blk_opf_t op)
>       return !!(op & (__force blk_opf_t)1);
>  }
>  
> +static inline bool op_is_copy(blk_opf_t op)
> +{
> +     return ((op & REQ_OP_MASK) == REQ_OP_COPY_SRC ||
> +             (op & REQ_OP_MASK) == REQ_OP_COPY_DST);
> +}

May be use a switch here to avoid the double masking of op ?

> +
>  /*
>   * Check if the bio or request is one that needs special treatment in the
>   * flush state machine.

-- 
Damien Le Moal
Western Digital Research


Reply via email to