On Sun, Oct 30, 2022 at 12:35:45AM -0400, Dmitry Fomichev wrote:
> This patch adds support for Zoned Block Devices (ZBDs) to the kernel
> virtio-blk driver.
> 
> The patch accompanies the virtio-blk ZBD support draft that is now
> being proposed for standardization. The latest version of the draft is
> linked at
> 
> https://github.com/oasis-tcs/virtio-spec/issues/143 .
> 
> The QEMU zoned device code that implements these protocol extensions
> has been developed by Sam Li and it is currently in review at the QEMU
> mailing list.
> 
> A number of virtblk request structure changes has been introduced to
> accommodate the functionality that is specific to zoned block devices
> and, most importantly, make room for carrying the Zoned Append sector
> value from the device back to the driver along with the request status.
> 
> The zone-specific code in the patch is heavily influenced by NVMe ZNS
> code in drivers/nvme/host/zns.c, but it is simpler because the proposed
> virtio ZBD draft only covers the zoned device features that are
> relevant to the zoned functionality provided by Linux block layer.
> 
> Co-developed-by: Stefan Hajnoczi <[email protected]>
> Signed-off-by: Stefan Hajnoczi <[email protected]>
> Signed-off-by: Dmitry Fomichev <[email protected]>
> ---
>  drivers/block/virtio_blk.c      | 438 ++++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_blk.h | 105 ++++++++
>  2 files changed, 524 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 3efe3da5f8c2..03b5302fac6e 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -15,6 +15,7 @@
>  #include <linux/blk-mq.h>
>  #include <linux/blk-mq-virtio.h>
>  #include <linux/numa.h>
> +#include <linux/vmalloc.h>
>  #include <uapi/linux/virtio_ring.h>
>  
>  #define PART_BITS 4
> @@ -80,22 +81,51 @@ struct virtio_blk {
>       int num_vqs;
>       int io_queues[HCTX_MAX_TYPES];
>       struct virtio_blk_vq *vqs;
> +
> +     /* For zoned device */
> +     unsigned int zone_sectors;
>  };
>  
>  struct virtblk_req {
> +     /* Out header */
>       struct virtio_blk_outhdr out_hdr;
> -     u8 status;
> +
> +     /* In header */
> +     union {
> +             struct {
> +                     u8 status;
> +             } common;
> +
> +             /*
> +              * The zone append command has an extended in header.
> +              * The status field in zone_append_in_hdr must always
> +              * be the last byte.
> +              */
> +             struct {
> +                     __virtio64 sector;
> +                     u8 status;
> +             } zone_append;
> +     } in_hdr;
> +
> +     size_t in_hdr_len;
> +
>       struct sg_table sg_table;
>       struct scatterlist sg[];
>  };
>  
> -static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
> +static inline blk_status_t virtblk_result(u8 status)
>  {
> -     switch (vbr->status) {
> +     switch (status) {
>       case VIRTIO_BLK_S_OK:
>               return BLK_STS_OK;
>       case VIRTIO_BLK_S_UNSUPP:
>               return BLK_STS_NOTSUPP;
> +     case VIRTIO_BLK_S_ZONE_OPEN_RESOURCE:
> +             return BLK_STS_ZONE_OPEN_RESOURCE;
> +     case VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE:
> +             return BLK_STS_ZONE_ACTIVE_RESOURCE;
> +     case VIRTIO_BLK_S_IOERR:
> +     case VIRTIO_BLK_S_ZONE_UNALIGNED_WP:
>       default:
>               return BLK_STS_IOERR;
>       }
> @@ -111,11 +141,11 @@ static inline struct virtio_blk_vq 
> *get_virtio_blk_vq(struct blk_mq_hw_ctx *hctx
>  
>  static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr)
>  {
> -     struct scatterlist hdr, status, *sgs[3];
> +     struct scatterlist out_hdr, in_hdr, *sgs[3];
>       unsigned int num_out = 0, num_in = 0;
>  
> -     sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
> -     sgs[num_out++] = &hdr;
> +     sg_init_one(&out_hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
> +     sgs[num_out++] = &out_hdr;
>  
>       if (vbr->sg_table.nents) {
>               if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, 
> VIRTIO_BLK_T_OUT))
> @@ -124,8 +154,8 @@ static int virtblk_add_req(struct virtqueue *vq, struct 
> virtblk_req *vbr)
>                       sgs[num_out + num_in++] = vbr->sg_table.sgl;
>       }
>  
> -     sg_init_one(&status, &vbr->status, sizeof(vbr->status));
> -     sgs[num_out + num_in++] = &status;
> +     sg_init_one(&in_hdr, &vbr->in_hdr.common.status, vbr->in_hdr_len);
> +     sgs[num_out + num_in++] = &in_hdr;
>  
>       return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
> @@ -214,21 +244,22 @@ static blk_status_t virtblk_setup_cmd(struct 
> virtio_device *vdev,
>                                     struct request *req,
>                                     struct virtblk_req *vbr)
>  {
> +     size_t in_hdr_len = sizeof(vbr->in_hdr.common.status);
>       bool unmap = false;
>       u32 type;
> +     u64 sector = 0;
>  
> -     vbr->out_hdr.sector = 0;
> +     /* Set fields for all request types */
> +     vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
>  
>       switch (req_op(req)) {
>       case REQ_OP_READ:
>               type = VIRTIO_BLK_T_IN;
> -             vbr->out_hdr.sector = cpu_to_virtio64(vdev,
> -                                                   blk_rq_pos(req));
> +             sector = blk_rq_pos(req);
>               break;
>       case REQ_OP_WRITE:
>               type = VIRTIO_BLK_T_OUT;
> -             vbr->out_hdr.sector = cpu_to_virtio64(vdev,
> -                                                   blk_rq_pos(req));
> +             sector = blk_rq_pos(req);
>               break;
>       case REQ_OP_FLUSH:
>               type = VIRTIO_BLK_T_FLUSH;
> @@ -243,16 +274,42 @@ static blk_status_t virtblk_setup_cmd(struct 
> virtio_device *vdev,
>       case REQ_OP_SECURE_ERASE:
>               type = VIRTIO_BLK_T_SECURE_ERASE;
>               break;
> -     case REQ_OP_DRV_IN:
> -             type = VIRTIO_BLK_T_GET_ID;
> +     case REQ_OP_ZONE_OPEN:
> +             type = VIRTIO_BLK_T_ZONE_OPEN;
> +             sector = blk_rq_pos(req);
>               break;
> +     case REQ_OP_ZONE_CLOSE:
> +             type = VIRTIO_BLK_T_ZONE_CLOSE;
> +             sector = blk_rq_pos(req);
> +             break;
> +     case REQ_OP_ZONE_FINISH:
> +             type = VIRTIO_BLK_T_ZONE_FINISH;
> +             sector = blk_rq_pos(req);
> +             break;
> +     case REQ_OP_ZONE_APPEND:
> +             type = VIRTIO_BLK_T_ZONE_APPEND;
> +             sector = blk_rq_pos(req);
> +             in_hdr_len = sizeof(vbr->in_hdr.zone_append);
> +             break;
> +     case REQ_OP_ZONE_RESET:
> +             type = VIRTIO_BLK_T_ZONE_RESET;
> +             sector = blk_rq_pos(req);
> +             break;
> +     case REQ_OP_ZONE_RESET_ALL:
> +             type = VIRTIO_BLK_T_ZONE_RESET_ALL;
> +             break;
> +     case REQ_OP_DRV_IN:
> +             /* Out header already filled in, nothing to do */
> +             return 0;
>       default:
>               WARN_ON_ONCE(1);
>               return BLK_STS_IOERR;
>       }
>  
> +     /* Set fields for non-REQ_OP_DRV_IN request types */
> +     vbr->in_hdr_len = in_hdr_len;
>       vbr->out_hdr.type = cpu_to_virtio32(vdev, type);
> -     vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req));
> +     vbr->out_hdr.sector = cpu_to_virtio64(vdev, sector);
>  
>       if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES ||
>           type == VIRTIO_BLK_T_SECURE_ERASE) {
> @@ -263,13 +320,30 @@ static blk_status_t virtblk_setup_cmd(struct 
> virtio_device *vdev,
>       return 0;
>  }
>  
> +/*
> + * The status byte is always the last byte of the virtblk request
> + * in-header. This helper fetches its value for all in-header formats
> + * that are currently defined.
> + */
> +static inline u8 virtblk_vbr_status(struct virtblk_req *vbr)
> +{
> +     return *((u8 *)&vbr->in_hdr + vbr->in_hdr_len - 1);
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>       struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> +     blk_status_t status = virtblk_result(virtblk_vbr_status(vbr));
> +     struct virtio_blk *vblk = req->mq_hctx->queue->queuedata;
>  
>       virtblk_unmap_data(req, vbr);
>       virtblk_cleanup_cmd(req);
> -     blk_mq_end_request(req, virtblk_result(vbr));
> +
> +     if (req_op(req) == REQ_OP_ZONE_APPEND)
> +             req->__sector = virtio64_to_cpu(vblk->vdev,
> +                                             vbr->in_hdr.zone_append.sector);
> +
> +     blk_mq_end_request(req, status);
>  }
>  
>  static void virtblk_done(struct virtqueue *vq)
> @@ -455,6 +529,315 @@ static void virtio_queue_rqs(struct request **rqlist)
>       *rqlist = requeue_list;
>  }
>  
> +#ifdef CONFIG_BLK_DEV_ZONED
> +static void *virtblk_alloc_report_buffer(struct virtio_blk *vblk,
> +                                       unsigned int nr_zones,
> +                                       unsigned int zone_sectors,
> +                                       size_t *buflen)
> +{
> +     struct request_queue *q = vblk->disk->queue;
> +     size_t bufsize;
> +     void *buf;
> +
> +     nr_zones = min_t(unsigned int, nr_zones,
> +                      get_capacity(vblk->disk) >> ilog2(zone_sectors));
> +
> +     bufsize = sizeof(struct virtio_blk_zone_report) +
> +             nr_zones * sizeof(struct virtio_blk_zone_descriptor);
> +     bufsize = min_t(size_t, bufsize,
> +                     queue_max_hw_sectors(q) << SECTOR_SHIFT);
> +     bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
> +
> +     while (bufsize >= sizeof(struct virtio_blk_zone_report)) {
> +             buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
> +             if (buf) {
> +                     *buflen = bufsize;
> +                     return buf;
> +             }
> +             bufsize >>= 1;
> +     }
> +
> +     return NULL;
> +}
> +
> +static int virtblk_submit_zone_report(struct virtio_blk *vblk,
> +                                    char *report_buf, size_t report_len,
> +                                    sector_t sector)
> +{
> +     struct request_queue *q = vblk->disk->queue;
> +     struct request *req;
> +     struct virtblk_req *vbr;
> +     int err;
> +
> +     req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0);
> +     if (IS_ERR(req))
> +             return PTR_ERR(req);
> +
> +     vbr = blk_mq_rq_to_pdu(req);
> +     vbr->in_hdr_len = sizeof(vbr->in_hdr.common.status);
> +     vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, 
> VIRTIO_BLK_T_ZONE_REPORT);
> +     vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, sector);
> +
> +     err = blk_rq_map_kern(q, req, report_buf, report_len, GFP_KERNEL);
> +     if (err)
> +             goto out;
> +
> +     blk_execute_rq(req, false);
> +     err = blk_status_to_errno(virtblk_result(vbr->in_hdr.common.status));
> +out:
> +     blk_mq_free_request(req);
> +     return err;
> +}
> +
> +static int virtblk_parse_zone(struct virtio_blk *vblk,
> +                            struct virtio_blk_zone_descriptor *entry,
> +                            unsigned int idx, unsigned int zone_sectors,
> +                            report_zones_cb cb, void *data)
> +{
> +     struct blk_zone zone = { };
> +
> +     zone.start = virtio64_to_cpu(vblk->vdev, entry->z_start);
> +     zone.capacity = virtio64_to_cpu(vblk->vdev, entry->z_cap);
> +
> +     switch (entry->z_type) {
> +     case VIRTIO_BLK_ZT_SWR:
> +             zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
> +             break;
> +     case VIRTIO_BLK_ZT_SWP:
> +             zone.type = BLK_ZONE_TYPE_SEQWRITE_PREF;
> +             break;
> +     case VIRTIO_BLK_ZT_CONV:
> +             zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
> +             break;
> +     default:
> +             dev_err(&vblk->vdev->dev, "zone %llu: invalid type %#x\n",
> +                     zone.start, entry->z_type);
> +             return -EINVAL;
> +     }
> +
> +     switch (entry->z_state) {
> +     case VIRTIO_BLK_ZS_EMPTY:
> +             zone.cond = BLK_ZONE_COND_EMPTY;
> +             break;
> +     case VIRTIO_BLK_ZS_CLOSED:
> +             zone.cond = BLK_ZONE_COND_CLOSED;
> +             break;
> +     case VIRTIO_BLK_ZS_FULL:
> +             zone.cond = BLK_ZONE_COND_FULL;
> +             break;
> +     case VIRTIO_BLK_ZS_EOPEN:
> +             zone.cond = BLK_ZONE_COND_EXP_OPEN;
> +             break;
> +     case VIRTIO_BLK_ZS_IOPEN:
> +             zone.cond = BLK_ZONE_COND_IMP_OPEN;
> +             break;
> +     case VIRTIO_BLK_ZS_NOT_WP:
> +             zone.cond = BLK_ZONE_COND_NOT_WP;
> +             break;
> +     case VIRTIO_BLK_ZS_RDONLY:
> +             zone.cond = BLK_ZONE_COND_READONLY;
> +             break;
> +     case VIRTIO_BLK_ZS_OFFLINE:
> +             zone.cond = BLK_ZONE_COND_OFFLINE;
> +             break;
> +     default:
> +             dev_err(&vblk->vdev->dev, "zone %llu: invalid condition %#x\n",
> +                     zone.start, entry->z_state);
> +             return -EINVAL;
> +     }
> +
> +     zone.len = zone_sectors;
> +     if (zone.cond == BLK_ZONE_COND_FULL)
> +             zone.wp = zone.start + zone.len;

Is this correct on devices where the last zone != zone_sectors? Maybe it
doesn't matter?

> +     else
> +             zone.wp = virtio64_to_cpu(vblk->vdev, entry->z_wp);

Is a sanity check needed here? The device is not trusted and must not be
able to trigger out-of-bounds accesses based on zone.wp or similar.

> +
> +     return cb(&zone, idx, data);
> +}
> +
> +static int virtblk_report_zones(struct gendisk *disk, sector_t sector,
> +                              unsigned int nr_zones, report_zones_cb cb,
> +                              void *data)
> +{
> +     struct virtio_blk *vblk = disk->private_data;
> +     struct virtio_blk_zone_report *report;
> +     unsigned long long nz, i;
> +     size_t buflen;
> +     unsigned int zone_sectors = vblk->zone_sectors;
> +     int ret, zone_idx = 0;
> +
> +     if (WARN_ON_ONCE(!vblk->zone_sectors))
> +             return -EOPNOTSUPP;
> +
> +     report = virtblk_alloc_report_buffer(vblk, nr_zones,
> +                                          zone_sectors, &buflen);
> +     if (!report)
> +             return -ENOMEM;
> +
> +     while (zone_idx < nr_zones && sector < get_capacity(vblk->disk)) {

Why is zone_idx a signed int while nr_zones is an unsigned int?

> +             memset(report, 0, buflen);
> +
> +             ret = virtblk_submit_zone_report(vblk, (char *)report,
> +                                              buflen, sector);
> +             if (ret) {
> +                     if (ret > 0)
> +                             ret = -EIO;
> +                     goto out_free;
> +             }
> +             nz = min(virtio64_to_cpu(vblk->vdev, report->nr_zones),
> +                      (u64)nr_zones);
> +             if (!nz)
> +                     break;
> +
> +             for (i = 0; i < nz && zone_idx < nr_zones; i++) {
> +                     ret = virtblk_parse_zone(vblk, &report->zones[i],
> +                                              zone_idx, zone_sectors, cb, 
> data);
> +                     if (ret)
> +                             goto out_free;
> +
> +                     sector = virtio64_to_cpu(vblk->vdev,
> +                                              report->zones[i].z_start) +
> +                              zone_sectors;
> +                     zone_idx++;
> +             }
> +     }
> +
> +     if (zone_idx > 0)
> +             ret = zone_idx;
> +     else
> +             ret = -EINVAL;
> +out_free:
> +     kvfree(report);
> +     return ret;
> +}
> +
> +static void virtblk_revalidate_zones(struct virtio_blk *vblk)
> +{
> +     u8 model;
> +
> +     if (!vblk->zone_sectors)
> +             return;
> +
> +     virtio_cread(vblk->vdev, struct virtio_blk_config,
> +                  zoned.model, &model);

model is unused? If used, then it needs to be validated like in
virtblk_probe_zoned_device().

> +     if (!blk_revalidate_disk_zones(vblk->disk, NULL))
> +             set_capacity_and_notify(vblk->disk, 0);
> +}
> +
> +static int virtblk_probe_zoned_device(struct virtio_device *vdev,
> +                                    struct virtio_blk *vblk,
> +                                    struct request_queue *q)
> +{
> +     u32 v, wg;
> +     u8 model;
> +     int ret;
> +
> +     virtio_cread(vdev, struct virtio_blk_config,
> +                  zoned.model, &model);
> +
> +     switch (model) {
> +     case VIRTIO_BLK_Z_NONE:
> +             return 0;
> +     case VIRTIO_BLK_Z_HM:
> +             break;
> +     case VIRTIO_BLK_Z_HA:
> +             /*
> +              * Present the host-aware device as a regular drive.
> +              * TODO It is possible to add an option to make it appear
> +              * in the system as a zoned drive.
> +              */
> +             return 0;
> +     default:
> +             dev_err(&vdev->dev, "unsupported zone model %d\n", model);
> +             return -EINVAL;
> +     }
> +
> +     dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
> +
> +     disk_set_zoned(vblk->disk, BLK_ZONED_HM);
> +     blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> +
> +     virtio_cread(vdev, struct virtio_blk_config,
> +                  zoned.max_open_zones, &v);
> +     disk_set_max_open_zones(vblk->disk, v);
> +
> +     dev_dbg(&vdev->dev, "max open zones = %u\n", v);
> +
> +     virtio_cread(vdev, struct virtio_blk_config,
> +                  zoned.max_active_zones, &v);
> +     disk_set_max_active_zones(vblk->disk, v);
> +     dev_dbg(&vdev->dev, "max active zones = %u\n", v);
> +
> +     virtio_cread(vdev, struct virtio_blk_config,
> +                  zoned.write_granularity, &wg);
> +     if (!wg) {
> +             dev_warn(&vdev->dev, "zero write granularity reported\n");
> +             return -ENODEV;
> +     }
> +     blk_queue_physical_block_size(q, wg);
> +     blk_queue_io_min(q, wg);
> +
> +     dev_dbg(&vdev->dev, "write granularity = %u\n", wg);
> +
> +     /*
> +      * virtio ZBD specification doesn't require zones to be a power of
> +      * two sectors in size, but the code in this driver expects that.
> +      */
> +     virtio_cread(vdev, struct virtio_blk_config, zoned.zone_sectors,
> +                  &vblk->zone_sectors);
> +     if (vblk->zone_sectors == 0 || !is_power_of_2(vblk->zone_sectors)) {
> +             dev_err(&vdev->dev,
> +                     "zoned device with non power of two zone size %u\n",
> +                     vblk->zone_sectors);
> +             return -ENODEV;
> +     }
> +     dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors);
> +
> +     if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +             dev_warn(&vblk->vdev->dev,
> +                      "ignoring negotiated F_DISCARD for zoned device\n");
> +             blk_queue_max_discard_sectors(q, 0);
> +     }
> +
> +     ret = blk_revalidate_disk_zones(vblk->disk, NULL);
> +     if (!ret) {
> +             virtio_cread(vdev, struct virtio_blk_config,
> +                          zoned.max_append_sectors, &v);
> +             if (!v) {
> +                     dev_warn(&vdev->dev, "zero max_append_sectors 
> reported\n");
> +                     return -ENODEV;
> +             }
> +             if ((v << SECTOR_SHIFT) < wg) {
> +                     dev_err(&vdev->dev,
> +                             "write granularity %u exceeds 
> max_append_sectors %u limit\n",
> +                             wg, v);
> +                     return -ENODEV;
> +             }
> +
> +             blk_queue_max_zone_append_sectors(q, v);
> +             dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
> +     }
> +
> +     return ret;
> +}
> +
> +#else
> +
> +/*
> + * Zoned block device support is not configured in this kernel.
> + * We only need to define a few symbols to avoid compilation errors.
> + */
> +#define virtblk_report_zones       NULL
> +static inline void virtblk_revalidate_zones(struct virtio_blk *vblk)
> +{
> +}
> +static inline int virtblk_probe_zoned_device(struct virtio_device *vdev,
> +                     struct virtio_blk *vblk, struct request_queue *q)
> +{
> +     return -EOPNOTSUPP;
> +}
> +#endif /* CONFIG_BLK_DEV_ZONED */
> +
>  /* return id (s/n) string for *disk to *id_str
>   */
>  static int virtblk_get_id(struct gendisk *disk, char *id_str)
> @@ -462,18 +845,24 @@ static int virtblk_get_id(struct gendisk *disk, char 
> *id_str)
>       struct virtio_blk *vblk = disk->private_data;
>       struct request_queue *q = vblk->disk->queue;
>       struct request *req;
> +     struct virtblk_req *vbr;
>       int err;
>  
>       req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0);
>       if (IS_ERR(req))
>               return PTR_ERR(req);
>  
> +     vbr = blk_mq_rq_to_pdu(req);
> +     vbr->in_hdr_len = sizeof(vbr->in_hdr.common.status);
> +     vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
> +     vbr->out_hdr.sector = 0;
> +
>       err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
>       if (err)
>               goto out;
>  
>       blk_execute_rq(req, false);
> -     err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
> +     err = blk_status_to_errno(virtblk_result(vbr->in_hdr.common.status));
>  out:
>       blk_mq_free_request(req);
>       return err;
> @@ -524,6 +913,7 @@ static const struct block_device_operations virtblk_fops 
> = {
>       .owner          = THIS_MODULE,
>       .getgeo         = virtblk_getgeo,
>       .free_disk      = virtblk_free_disk,
> +     .report_zones   = virtblk_report_zones,

Does virtblk_report_zones() need:

  mutex_lock(&vblk->vdev_mutex);

  if (!vblk->vdev) {
      ret = -ENXIO;
      goto out;
  }

  ...

  mutex_unlock(&vblk->vdev_mutex);
  return ret;

?

This is necessary when vblk->vdev is access by code that is reachable
after virtblk_remove() has been called (e.g. PCI hot unplug). In this
case the block device still exists and userspace processes may have file
descriptors open, but the underlying virtio-blk device is gone.

I think this may be necessary since ioctl(BLKREPORTZONE) can still be
called after virtblk_remove()?

See the vdev_mutex field doc comment for more information.

Stefan

Attachment: signature.asc
Description: PGP signature

Reply via email to