add a ->free_disk block_device_operation v3

2022-02-15 Thread Christoph Hellwig
Hi Jens,

this series adds a ->free_disk method to struct block_device_operation so that
drivers can defer freeing their disk private data until the gendisk goes away
and don't need to play games with the validity of ->private_data.

This also converts three simple drivers over as example, but eventually I
imagine that all drivers with private data will use it.

Changes since v2:
 - only call ->free_disk after add_disk has returned to simplify probe error
   handling
Changes since v1:
 - fix a tag_set use after free in virtio_blk

Diffstat:
 block/genhd.c   |5 ++
 drivers/block/virtio_blk.c  |   66 +++-
 drivers/memstick/core/ms_block.c|   64 --
 drivers/memstick/core/ms_block.h|1 
 drivers/memstick/core/mspro_block.c |   57 +--
 include/linux/blkdev.h  |2 +
 6 files changed, 46 insertions(+), 149 deletions(-)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 3/5] memstick/mspro_block: fix handling of read-only devices

2022-02-15 Thread Christoph Hellwig
Use set_disk_ro to propagate the read-only state to the block layer
instead of checking for it in ->open and leaking a reference in case
of a read-only device.

Signed-off-by: Christoph Hellwig 
---
 drivers/memstick/core/mspro_block.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/memstick/core/mspro_block.c 
b/drivers/memstick/core/mspro_block.c
index c0450397b6735..7ea312f0840e0 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -186,13 +186,8 @@ static int mspro_block_bd_open(struct block_device *bdev, 
fmode_t mode)
 
mutex_lock(_block_disk_lock);
 
-   if (msb && msb->card) {
+   if (msb && msb->card)
msb->usage_count++;
-   if ((mode & FMODE_WRITE) && msb->read_only)
-   rc = -EROFS;
-   else
-   rc = 0;
-   }
 
mutex_unlock(_block_disk_lock);
 
@@ -1239,6 +1234,9 @@ static int mspro_block_init_disk(struct memstick_dev 
*card)
set_capacity(msb->disk, capacity);
dev_dbg(>dev, "capacity set %ld\n", capacity);
 
+   if (msb->read_only)
+   set_disk_ro(msb->disk, true);
+
rc = device_add_disk(>dev, msb->disk, NULL);
if (rc)
goto out_cleanup_disk;
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 2/5] memstick/ms_block: simplify refcounting

2022-02-15 Thread Christoph Hellwig
Implement the ->free_disk method to free the msb_data structure only once
the last gendisk reference goes away instead of keeping a local refcount.

Signed-off-by: Christoph Hellwig 
---
 drivers/memstick/core/ms_block.c | 64 
 drivers/memstick/core/ms_block.h |  1 -
 2 files changed, 15 insertions(+), 50 deletions(-)

diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 0cda6c6baefc3..3993bdd4b519c 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1943,22 +1943,6 @@ static void msb_io_work(struct work_struct *work)
 static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */
 static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release 
*/
 
-static int msb_bd_open(struct block_device *bdev, fmode_t mode)
-{
-   struct gendisk *disk = bdev->bd_disk;
-   struct msb_data *msb = disk->private_data;
-
-   dbg_verbose("block device open");
-
-   mutex_lock(_disk_lock);
-
-   if (msb && msb->card)
-   msb->usage_count++;
-
-   mutex_unlock(_disk_lock);
-   return 0;
-}
-
 static void msb_data_clear(struct msb_data *msb)
 {
kfree(msb->boot_page);
@@ -1968,33 +1952,6 @@ static void msb_data_clear(struct msb_data *msb)
msb->card = NULL;
 }
 
-static int msb_disk_release(struct gendisk *disk)
-{
-   struct msb_data *msb = disk->private_data;
-
-   dbg_verbose("block device release");
-   mutex_lock(_disk_lock);
-
-   if (msb) {
-   if (msb->usage_count)
-   msb->usage_count--;
-
-   if (!msb->usage_count) {
-   disk->private_data = NULL;
-   idr_remove(_disk_idr, msb->disk_id);
-   put_disk(disk);
-   kfree(msb);
-   }
-   }
-   mutex_unlock(_disk_lock);
-   return 0;
-}
-
-static void msb_bd_release(struct gendisk *disk, fmode_t mode)
-{
-   msb_disk_release(disk);
-}
-
 static int msb_bd_getgeo(struct block_device *bdev,
 struct hd_geometry *geo)
 {
@@ -2003,6 +1960,17 @@ static int msb_bd_getgeo(struct block_device *bdev,
return 0;
 }
 
+static void msb_bd_free_disk(struct gendisk *disk)
+{
+   struct msb_data *msb = disk->private_data;
+
+   mutex_lock(_disk_lock);
+   idr_remove(_disk_idr, msb->disk_id);
+   mutex_unlock(_disk_lock);
+
+   kfree(msb);
+}
+
 static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx,
 const struct blk_mq_queue_data *bd)
 {
@@ -2096,10 +2064,9 @@ static void msb_start(struct memstick_dev *card)
 }
 
 static const struct block_device_operations msb_bdops = {
-   .open= msb_bd_open,
-   .release = msb_bd_release,
-   .getgeo  = msb_bd_getgeo,
-   .owner   = THIS_MODULE
+   .owner  = THIS_MODULE,
+   .getgeo = msb_bd_getgeo,
+   .free_disk  = msb_bd_free_disk, 
 };
 
 static const struct blk_mq_ops msb_mq_ops = {
@@ -2147,7 +2114,6 @@ static int msb_init_disk(struct memstick_dev *card)
set_capacity(msb->disk, capacity);
dbg("Set total disk size to %lu sectors", capacity);
 
-   msb->usage_count = 1;
msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM);
INIT_WORK(>io_work, msb_io_work);
sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
@@ -2229,7 +2195,7 @@ static void msb_remove(struct memstick_dev *card)
msb_data_clear(msb);
mutex_unlock(_disk_lock);
 
-   msb_disk_release(msb->disk);
+   put_disk(msb->disk);
memstick_set_drvdata(card, NULL);
 }
 
diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h
index 122e1a8a8bd5b..7058f9aefeb92 100644
--- a/drivers/memstick/core/ms_block.h
+++ b/drivers/memstick/core/ms_block.h
@@ -143,7 +143,6 @@ struct ms_boot_page {
 } __packed;
 
 struct msb_data {
-   unsigned intusage_count;
struct memstick_dev *card;
struct gendisk  *disk;
struct request_queue*queue;
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 1/5] block: add a ->free_disk method

2022-02-15 Thread Christoph Hellwig
Add a method to notify the driver that the gendisk is about to be freed.
This allows drivers to tie the lifetime of their private data to that of
the gendisk and thus deal with device removal races without expensive
synchronization and boilerplate code.

A new flag is added so that ->free_disk is only called after a successful
call to add_disk, which significantly simplifies the error handling path
during probing.

Signed-off-by: Christoph Hellwig 
---
 block/genhd.c  | 5 +
 include/linux/blkdev.h | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/block/genhd.c b/block/genhd.c
index 9589d1d59afab..e351fac41bf25 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -526,6 +526,7 @@ int __must_check device_add_disk(struct device *parent, 
struct gendisk *disk,
 
disk_update_readahead(disk);
disk_add_events(disk);
+   set_bit(GD_ADDED, >state);
return 0;
 
 out_unregister_bdi:
@@ -1119,6 +1120,10 @@ static void disk_release(struct device *dev)
xa_destroy(>part_tbl);
disk->queue->disk = NULL;
blk_put_queue(disk->queue);
+
+   if (test_bit(GD_ADDED, >state) && disk->fops->free_disk)
+   disk->fops->free_disk(disk);
+
iput(disk->part0->bd_inode);/* frees the disk */
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3bfc75a2a4509..f757f9c2871f8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -146,6 +146,7 @@ struct gendisk {
 #define GD_READ_ONLY   1
 #define GD_DEAD2
 #define GD_NATIVE_CAPACITY 3
+#define GD_ADDED   4
 
struct mutex open_mutex;/* open/close mutex */
unsigned open_partitions;   /* number of open partitions */
@@ -1464,6 +1465,7 @@ struct block_device_operations {
void (*unlock_native_capacity) (struct gendisk *);
int (*getgeo)(struct block_device *, struct hd_geometry *);
int (*set_read_only)(struct block_device *bdev, bool ro);
+   void (*free_disk)(struct gendisk *disk);
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 4/5] memstick/mspro_block: simplify refcounting

2022-02-15 Thread Christoph Hellwig
Implement the ->free_disk method to free the msb_data structure only once
the last gendisk reference goes away instead of keeping a local
refcount.

Signed-off-by: Christoph Hellwig 
---
 drivers/memstick/core/mspro_block.c | 49 +
 1 file changed, 7 insertions(+), 42 deletions(-)

diff --git a/drivers/memstick/core/mspro_block.c 
b/drivers/memstick/core/mspro_block.c
index 7ea312f0840e0..725ba74ded308 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -133,7 +133,6 @@ struct mspro_devinfo {
 
 struct mspro_block_data {
struct memstick_dev   *card;
-   unsigned int  usage_count;
unsigned int  caps;
struct gendisk*disk;
struct request_queue  *queue;
@@ -178,48 +177,16 @@ static int mspro_block_complete_req(struct memstick_dev 
*card, int error);
 
 /*** Block device ***/
 
-static int mspro_block_bd_open(struct block_device *bdev, fmode_t mode)
-{
-   struct gendisk *disk = bdev->bd_disk;
-   struct mspro_block_data *msb = disk->private_data;
-   int rc = -ENXIO;
-
-   mutex_lock(_block_disk_lock);
-
-   if (msb && msb->card)
-   msb->usage_count++;
-
-   mutex_unlock(_block_disk_lock);
-
-   return rc;
-}
-
-
-static void mspro_block_disk_release(struct gendisk *disk)
+static void mspro_block_bd_free_disk(struct gendisk *disk)
 {
struct mspro_block_data *msb = disk->private_data;
int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT;
 
mutex_lock(_block_disk_lock);
-
-   if (msb) {
-   if (msb->usage_count)
-   msb->usage_count--;
-
-   if (!msb->usage_count) {
-   kfree(msb);
-   disk->private_data = NULL;
-   idr_remove(_block_disk_idr, disk_id);
-   put_disk(disk);
-   }
-   }
-
+   idr_remove(_block_disk_idr, disk_id);
mutex_unlock(_block_disk_lock);
-}
 
-static void mspro_block_bd_release(struct gendisk *disk, fmode_t mode)
-{
-   mspro_block_disk_release(disk);
+   kfree(msb);
 }
 
 static int mspro_block_bd_getgeo(struct block_device *bdev,
@@ -235,10 +202,9 @@ static int mspro_block_bd_getgeo(struct block_device *bdev,
 }
 
 static const struct block_device_operations ms_block_bdops = {
-   .open= mspro_block_bd_open,
-   .release = mspro_block_bd_release,
-   .getgeo  = mspro_block_bd_getgeo,
-   .owner   = THIS_MODULE
+   .owner  = THIS_MODULE,
+   .getgeo = mspro_block_bd_getgeo,
+   .free_disk  = mspro_block_bd_free_disk,
 };
 
 /*** Information ***/
@@ -1221,7 +1187,6 @@ static int mspro_block_init_disk(struct memstick_dev 
*card)
msb->disk->first_minor = disk_id << MSPRO_BLOCK_PART_SHIFT;
msb->disk->minors = 1 << MSPRO_BLOCK_PART_SHIFT;
msb->disk->fops = _block_bdops;
-   msb->usage_count = 1;
msb->disk->private_data = msb;
 
sprintf(msb->disk->disk_name, "mspblk%d", disk_id);
@@ -1339,7 +1304,7 @@ static void mspro_block_remove(struct memstick_dev *card)
mspro_block_data_clear(msb);
mutex_unlock(_block_disk_lock);
 
-   mspro_block_disk_release(msb->disk);
+   put_disk(msb->disk);
memstick_set_drvdata(card, NULL);
 }
 
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH 5/5] virtio_blk: simplify refcounting

2022-02-15 Thread Christoph Hellwig
Implement the ->free_disk method to free the virtio_blk structure only
once the last gendisk reference goes away instead of keeping a local
refcount.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Stefan Hajnoczi 
Acked-by: Michael S. Tsirkin 
---
 drivers/block/virtio_blk.c | 66 --
 1 file changed, 14 insertions(+), 52 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c443cd64fc9b4..5c636ca7f1a7f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -69,13 +69,6 @@ struct virtio_blk {
/* Process context for config space updates */
struct work_struct config_work;
 
-   /*
-* Tracks references from block_device_operations open/release and
-* virtio_driver probe/remove so this object can be freed once no
-* longer in use.
-*/
-   refcount_t refs;
-
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems;
 
@@ -391,43 +384,6 @@ static int virtblk_get_id(struct gendisk *disk, char 
*id_str)
return err;
 }
 
-static void virtblk_get(struct virtio_blk *vblk)
-{
-   refcount_inc(>refs);
-}
-
-static void virtblk_put(struct virtio_blk *vblk)
-{
-   if (refcount_dec_and_test(>refs)) {
-   ida_simple_remove(_index_ida, vblk->index);
-   mutex_destroy(>vdev_mutex);
-   kfree(vblk);
-   }
-}
-
-static int virtblk_open(struct block_device *bd, fmode_t mode)
-{
-   struct virtio_blk *vblk = bd->bd_disk->private_data;
-   int ret = 0;
-
-   mutex_lock(>vdev_mutex);
-
-   if (vblk->vdev)
-   virtblk_get(vblk);
-   else
-   ret = -ENXIO;
-
-   mutex_unlock(>vdev_mutex);
-   return ret;
-}
-
-static void virtblk_release(struct gendisk *disk, fmode_t mode)
-{
-   struct virtio_blk *vblk = disk->private_data;
-
-   virtblk_put(vblk);
-}
-
 /* We provide getgeo only to please some old bootloader/partitioning tools */
 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 {
@@ -460,11 +416,19 @@ static int virtblk_getgeo(struct block_device *bd, struct 
hd_geometry *geo)
return ret;
 }
 
+static void virtblk_free_disk(struct gendisk *disk)
+{
+   struct virtio_blk *vblk = disk->private_data;
+
+   ida_simple_remove(_index_ida, vblk->index);
+   mutex_destroy(>vdev_mutex);
+   kfree(vblk);
+}
+
 static const struct block_device_operations virtblk_fops = {
-   .owner  = THIS_MODULE,
-   .open = virtblk_open,
-   .release = virtblk_release,
-   .getgeo = virtblk_getgeo,
+   .owner  = THIS_MODULE,
+   .getgeo = virtblk_getgeo,
+   .free_disk  = virtblk_free_disk,
 };
 
 static int index_to_minor(int index)
@@ -791,8 +755,6 @@ static int virtblk_probe(struct virtio_device *vdev)
goto out_free_index;
}
 
-   /* This reference is dropped in virtblk_remove(). */
-   refcount_set(>refs, 1);
mutex_init(>vdev_mutex);
 
vblk->vdev = vdev;
@@ -970,7 +932,7 @@ static void virtblk_remove(struct virtio_device *vdev)
flush_work(>config_work);
 
del_gendisk(vblk->disk);
-   blk_cleanup_disk(vblk->disk);
+   blk_cleanup_queue(vblk->disk->queue);
blk_mq_free_tag_set(>tag_set);
 
mutex_lock(>vdev_mutex);
@@ -986,7 +948,7 @@ static void virtblk_remove(struct virtio_device *vdev)
 
mutex_unlock(>vdev_mutex);
 
-   virtblk_put(vblk);
+   put_disk(vblk->disk);
 }
 
 #ifdef CONFIG_PM_SLEEP
-- 
2.30.2

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] Virtio-balloon: add user space API for sizing

2022-02-15 Thread David Hildenbrand
On 14.02.22 20:59, Kameron Lutes wrote:
> This new linux API will allow user space applications to directly
> control the size of the virtio-balloon. This is useful in
> situations where the guest must quickly respond to drastically
> increased memory pressure and cannot wait for the host to adjust
> the balloon's size.
> 
> Under the current wording of the Virtio spec, guest driven
> behavior such as this is permitted:
> 
> VIRTIO Version 1.1 Section 5.5.6
> "The device is driven either by the receipt of a configuration
> change notification, or by changing guest memory needs, such as
> performing memory compaction or responding to out of memory
> conditions."

Not quite. num_pages is determined by the hypervisor only and the guest
is not expected to change it, and if it does, it's ignored.

5.5.6 does not indicate at all that the guest may change it or that it
would have any effect. num_pages is examined only, actual is updated by
the driver.

5.5.6.1 documents what's allowed, e.g.,

  The driver SHOULD supply pages to the balloon when num_pages is
  greater than the actual number of pages in the balloon.

  The driver MAY use pages from the balloon when num_pages is less than
  the actual number of pages in the balloon.

and special handling for VIRTIO_BALLOON_F_DEFLATE_ON_OOM.

Especially, we have

  The driver MUST update actual after changing the number of pages in
  the balloon.

  The driver MAY update actual once after multiple inflate and deflate
  operations.

That's also why QEMU never syncs back the num_pages value from the guest
when writing the config.


Current spec does not allow for what you propose.


> 
> The intended use case for this API is one where the host
> communicates a deflation limit to the guest. The guest may then
> choose to respond to memory pressure by deflating its balloon down
> to the guest's allowable limit.

It would be good to have a full proposal and a proper spec update. I'd
assume you'd want separate values for soft vs. hard num_values -- if
that's what we really want.

BUT

There seems to be recent interest in handling memory pressure in a
better way (although how to really detect "serious memory pressure" vs
"ordinary reclaim" in Linux is still to be figured out). There is
already a discussion going on how that could happen. Adding random user
space toggles might not be the best idea. We might want a single
mechanism to achieve that.

https://lists.oasis-open.org/archives/virtio-comment/202201/msg00139.html

-- 
Thanks,

David / dhildenb

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V4 4/4] vDPA/ifcvf: implement shared IRQ feature

2022-02-15 Thread Michael S. Tsirkin
On Tue, Feb 15, 2022 at 11:34:31AM +0800, Jason Wang wrote:
> On Mon, Feb 14, 2022 at 10:25 PM Michael S. Tsirkin  wrote:
> >
> > On Mon, Feb 14, 2022 at 03:19:25PM +0800, Jason Wang wrote:
> > >
> > > 在 2022/2/3 下午3:27, Zhu Lingshan 写道:
> > > > On some platforms/devices, there may not be enough MSI vector
> > > > slots allocated for virtqueues and config changes. In such a case,
> > > > the interrupt sources(virtqueues, config changes) must share
> > > > an IRQ/vector, to avoid initialization failures, keep
> > > > the device functional.
> > > >
> > > > This commit handles three cases:
> > > > (1) number of the allocated vectors == the number of virtqueues + 1
> > > > (config changes), every virtqueue and the config interrupt has
> > > > a separated vector/IRQ, the best and the most likely case.
> > > > (2) number of the allocated vectors is less than the best case, but
> > > > greater than 1. In this case, all virtqueues share a vector/IRQ,
> > > > the config interrupt has a separated vector/IRQ
> > > > (3) only one vector is allocated, in this case, the virtqueues and
> > > > the config interrupt share a vector/IRQ. The worst and most
> > > > unlikely case.
> > > >
> > > > Otherwise, it needs to fail.
> > > >
> > > > This commit introduces some helper functions:
> > > > ifcvf_set_vq_vector() and ifcvf_set_config_vector() sets virtqueue
> > > > vector and config vector in the device config space, so that
> > > > the device can send interrupt DMA.
> > > >
> > > > This commit adds some fields in struct ifcvf_hw and re-placed
> > > > the existed fields to be aligned with the cacheline.
> > > >
> > > > Signed-off-by: Zhu Lingshan 
> > > > ---
> > > >   drivers/vdpa/ifcvf/ifcvf_base.c |  47 --
> > > >   drivers/vdpa/ifcvf/ifcvf_base.h |  23 ++-
> > > >   drivers/vdpa/ifcvf/ifcvf_main.c | 243 +++-
> > > >   3 files changed, 256 insertions(+), 57 deletions(-)
> > > >
> > > > diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c 
> > > > b/drivers/vdpa/ifcvf/ifcvf_base.c
> > > > index 397692ae671c..18dcb63ab1e3 100644
> > > > --- a/drivers/vdpa/ifcvf/ifcvf_base.c
> > > > +++ b/drivers/vdpa/ifcvf/ifcvf_base.c
> > > > @@ -15,6 +15,36 @@ struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw 
> > > > *hw)
> > > > return container_of(hw, struct ifcvf_adapter, vf);
> > > >   }
> > > > +int ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector)
> > > > +{
> > > > +   struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
> > > > +   struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
> > > > +
> > > > +   ifc_iowrite16(qid, >queue_select);
> > > > +   ifc_iowrite16(vector, >queue_msix_vector);
> > > > +   if (ifc_ioread16(>queue_msix_vector) == VIRTIO_MSI_NO_VECTOR) {
> > > > +   IFCVF_ERR(ifcvf->pdev, "No msix vector for queue %u\n", 
> > > > qid);
> > > > +   return -EINVAL;
> > > > +   }
> > >
> > >
> > > Let's leave this check for the caller, E.g can caller try to assign
> > > NO_VECTOR during uni-nit?
> >
> >
> > Hmm I'm not sure I agree. Caller will have to write queue select
> > again, and that's an extra IO, which is a waste.
> > And having function checking itself just seems better contained.
> 
> It's as simple as just return what we read here like virtio_pci did:
> 
> u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev,
>u16 index, u16 vector)
> {
> struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
> 
> vp_iowrite16(index, >queue_select);
> vp_iowrite16(vector, >queue_msix_vector);
> /* Flush the write out to device */
> return vp_ioread16(>queue_msix_vector);
> }
> EXPORT_SYMBOL_GPL(vp_modern_queue_vector);


That would be ok.

> Then the caller can decide to check it with NO_VECTOR if needed.
> Current code will break the callers that want to set NO_VECTOR.
> 
> >
> > >
> > > > +
> > > > +   return 0;
> > > > +}
> > > > +
> > > > +int ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
> > > > +{
> > > > +   struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
> > > > +   struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
> > > > +
> > > > +   cfg = hw->common_cfg;
> > > > +   ifc_iowrite16(vector,  >msix_config);
> > > > +   if (ifc_ioread16(>msix_config) == VIRTIO_MSI_NO_VECTOR) {
> > > > +   IFCVF_ERR(ifcvf->pdev, "No msix vector for device 
> > > > config\n");
> > > > +   return -EINVAL;
> > > > +   }
> > >
> > >
> > > Similar question as above.
> > >
> > >
> > > > +
> > > > +   return 0;
> > > > +}
> > > > +
> > > >   static void __iomem *get_cap_addr(struct ifcvf_hw *hw,
> > > >   struct virtio_pci_cap *cap)
> > > >   {
> > > > @@ -140,6 +170,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct 
> > > > pci_dev *pdev)
> > > >   hw->common_cfg, hw->notify_base, hw->isr,
> > > >   hw->dev_cfg, hw->notify_off_multiplier);
> > > > +   hw->vqs_shared_irq = -EINVAL;
> > > > +
> > > >

PING: [PATCH v2 0/3] Introduce akcipher service for virtio-crypto

2022-02-15 Thread zhenwei pi

Hi, Lei

Could you please review the V2 version?

On 2/11/22 4:41 PM, zhenwei pi wrote:

v1 -> v2:
- Fix 1 compiling warning reported by kernel test robot 
- Put "__le32 akcipher_algo;" instead of "__le32 reserve;" field of
struct virtio_crypto_config directly without size change.
- Add padding in struct virtio_crypto_ecdsa_session_para to keep
64-bit alignment.
- Remove irrelevant change by code format alignment.

- Also CC crypto gurus Herbert and linux-cry...@vger.kernel.org.

- Test with QEMU(patched by the v2 version), works fine.

v1:
- Introduce akcipher service, implement RSA algorithm, and a minor fix.

zhenwei pi (3):
   virtio_crypto: Introduce VIRTIO_CRYPTO_NOSPC
   virtio-crypto: introduce akcipher service
   virtio-crypto: implement RSA algorithm

  drivers/crypto/virtio/Makefile|   1 +
  .../virtio/virtio_crypto_akcipher_algo.c  | 584 ++
  drivers/crypto/virtio/virtio_crypto_common.h  |   3 +
  drivers/crypto/virtio/virtio_crypto_core.c|   6 +-
  drivers/crypto/virtio/virtio_crypto_mgr.c |  11 +
  include/uapi/linux/virtio_crypto.h|  82 ++-
  6 files changed, 685 insertions(+), 2 deletions(-)
  create mode 100644 drivers/crypto/virtio/virtio_crypto_akcipher_algo.c



--
zhenwei pi
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC v3 1/2] virtio-pmem: Async virtio-pmem flush

2022-02-15 Thread Dan Williams
On Tue, Jan 11, 2022 at 8:23 AM Pankaj Gupta
 wrote:
>
> Enable asynchronous flush for virtio pmem using work queue. Also,
> coalesce the flush requests when a flush is already in process.
> This functionality is copied from md/RAID code.
>
> When a flush is already in process, new flush requests wait till
> previous flush completes in another context (work queue). For all
> the requests come between ongoing flush and new flush start time, only
> single flush executes, thus adhers to flush coalscing logic. This is

s/adhers/adheres/

s/coalscing/coalescing/

> important for maintaining the flush request order with request coalscing.

s/coalscing/coalescing/

>
> Signed-off-by: Pankaj Gupta 
> ---
>  drivers/nvdimm/nd_virtio.c   | 74 +++-
>  drivers/nvdimm/virtio_pmem.c | 10 +
>  drivers/nvdimm/virtio_pmem.h | 16 
>  3 files changed, 83 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> index 10351d5b49fa..179ea7a73338 100644
> --- a/drivers/nvdimm/nd_virtio.c
> +++ b/drivers/nvdimm/nd_virtio.c
> @@ -100,26 +100,66 @@ static int virtio_pmem_flush(struct nd_region 
> *nd_region)
>  /* The asynchronous flush callback function */
>  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
>  {
> -   /*
> -* Create child bio for asynchronous flush and chain with
> -* parent bio. Otherwise directly call nd_region flush.
> +   /* queue asynchronous flush and coalesce the flush requests */
> +   struct virtio_device *vdev = nd_region->provider_data;
> +   struct virtio_pmem *vpmem  = vdev->priv;
> +   ktime_t req_start = ktime_get_boottime();
> +   int ret = -EINPROGRESS;
> +
> +   spin_lock_irq(>lock);

Why a new lock and not continue to use ->pmem_lock?

Have you tested this with CONFIG_PROVE_LOCKING?

Along those lines do you have a selftest that can be added to the
kernel as well so that 0day or other bots could offer early warnings
on regressions?

> +   /* flush requests wait until ongoing flush completes,
> +* hence coalescing all the pending requests.
>  */
> -   if (bio && bio->bi_iter.bi_sector != -1) {
> -   struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> -
> -   if (!child)
> -   return -ENOMEM;
> -   bio_copy_dev(child, bio);
> -   child->bi_opf = REQ_PREFLUSH;
> -   child->bi_iter.bi_sector = -1;
> -   bio_chain(child, bio);
> -   submit_bio(child);
> -   return 0;
> +   wait_event_lock_irq(vpmem->sb_wait,
> +   !vpmem->flush_bio ||
> +   ktime_before(req_start, vpmem->prev_flush_start),
> +   vpmem->lock);
> +   /* new request after previous flush is completed */
> +   if (ktime_after(req_start, vpmem->prev_flush_start)) {
> +   WARN_ON(vpmem->flush_bio);
> +   vpmem->flush_bio = bio;
> +   bio = NULL;
> +   }
> +   spin_unlock_irq(>lock);
> +
> +   if (!bio)
> +   queue_work(vpmem->pmem_wq, >flush_work);
> +   else {
> +   /* flush completed in other context while we waited */
> +   if (bio && (bio->bi_opf & REQ_PREFLUSH))
> +   bio->bi_opf &= ~REQ_PREFLUSH;
> +   else if (bio && (bio->bi_opf & REQ_FUA))
> +   bio->bi_opf &= ~REQ_FUA;
> +
> +   ret = vpmem->prev_flush_err;
> }
> -   if (virtio_pmem_flush(nd_region))
> -   return -EIO;
>
> -   return 0;
> +   return ret;
>  };
>  EXPORT_SYMBOL_GPL(async_pmem_flush);
> +
> +void submit_async_flush(struct work_struct *ws)

This name is too generic to be exported from drivers/nvdimm/nd_virtio.c

...it strikes me that there is little reason for nd_virtio and
virtio_pmem to be separate modules. They are both enabled by the same
Kconfig, so why not combine them into one module and drop the exports?

> +{
> +   struct virtio_pmem *vpmem = container_of(ws, struct virtio_pmem, 
> flush_work);
> +   struct bio *bio = vpmem->flush_bio;
> +
> +   vpmem->start_flush = ktime_get_boottime();
> +   vpmem->prev_flush_err = virtio_pmem_flush(vpmem->nd_region);
> +   vpmem->prev_flush_start = vpmem->start_flush;
> +   vpmem->flush_bio = NULL;
> +   wake_up(>sb_wait);
> +
> +   if (vpmem->prev_flush_err)
> +   bio->bi_status = errno_to_blk_status(-EIO);
> +
> +   /* Submit parent bio only for PREFLUSH */
> +   if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> +   bio->bi_opf &= ~REQ_PREFLUSH;
> +   submit_bio(bio);
> +   } else if (bio && (bio->bi_opf & REQ_FUA)) {
> +   bio->bi_opf &= ~REQ_FUA;
> +   bio_endio(bio);
> +   }
> +}
> +EXPORT_SYMBOL_GPL(submit_async_flush);
>  MODULE_LICENSE("GPL");
> diff --git 

Re: [PATCH v5 06/22] virtio_ring: queue_reset: packed: support enable reset queue

2022-02-15 Thread Jason Wang
On Mon, Feb 14, 2022 at 4:14 PM Xuan Zhuo  wrote:
>
> The purpose of this patch is to make vring packed support re-enable reset
> vq.
>
> Based on whether the incoming vq passed by vring_setup_virtqueue() is
> NULL or not, distinguish whether it is a normal create virtqueue or
> re-enable a reset queue.
>
> When re-enable a reset queue, reuse the original callback, name, indirect.
>
> Signed-off-by: Xuan Zhuo 
> ---
>  drivers/virtio/virtio_ring.c | 29 ++---
>  1 file changed, 18 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 4639e1643c78..20659f7ca582 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -1683,7 +1683,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
> bool context,
> bool (*notify)(struct virtqueue *),
> void (*callback)(struct virtqueue *),
> -   const char *name)
> +   const char *name,
> +   struct virtqueue *_vq)
>  {
> struct vring_virtqueue *vq;
> struct vring_packed_desc *ring;
> @@ -1713,13 +1714,20 @@ static struct virtqueue 
> *vring_create_virtqueue_packed(
> if (!device)
> goto err_device;
>
> -   vq = kmalloc(sizeof(*vq), GFP_KERNEL);
> -   if (!vq)
> -   goto err_vq;
> +   if (_vq) {
> +   vq = to_vvq(_vq);
> +   } else {
> +   vq = kmalloc(sizeof(*vq), GFP_KERNEL);
> +   if (!vq)
> +   goto err_vq;
> +
> +   vq->vq.callback = callback;
> +   vq->vq.name = name;
> +   vq->indirect = virtio_has_feature(vdev, 
> VIRTIO_RING_F_INDIRECT_DESC) &&
> +   !context;
> +   }

The code looks tricky. Except for the memory we don't even need to
touch any of the other attributes.

I'd suggest splitting out the vring allocation into a dedicated helper
that could be called by both vring_create_queue_XXX and the enable()
logic (and in the enable logic we don't even need to relocate if size
is not changed).

Thanks

>
> -   vq->vq.callback = callback;
> vq->vq.vdev = vdev;
> -   vq->vq.name = name;
> vq->vq.num_free = num;
> vq->vq.index = index;
> vq->we_own_ring = true;
> @@ -1736,8 +1744,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
> vq->last_add_time_valid = false;
>  #endif
>
> -   vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) 
> &&
> -   !context;
> vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
>
> if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> @@ -1778,7 +1784,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
> goto err_desc_extra;
>
> /* No callback?  Tell other side not to bother us. */
> -   if (!callback) {
> +   if (!vq->vq.callback) {
> vq->packed.event_flags_shadow = 
> VRING_PACKED_EVENT_FLAG_DISABLE;
> vq->packed.vring.driver->flags =
> cpu_to_le16(vq->packed.event_flags_shadow);
> @@ -1792,7 +1798,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
>  err_desc_extra:
> kfree(vq->packed.desc_state);
>  err_desc_state:
> -   kfree(vq);
> +   if (!_vq)
> +   kfree(vq);
>  err_vq:
> vring_free_queue(vdev, event_size_in_bytes, device, 
> device_event_dma_addr);
>  err_device:
> @@ -2317,7 +2324,7 @@ struct virtqueue *vring_setup_virtqueue(
> if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
> return vring_create_virtqueue_packed(index, num, vring_align,
> vdev, weak_barriers, may_reduce_num,
> -   context, notify, callback, name);
> +   context, notify, callback, name, vq);
>
> return vring_create_virtqueue_split(index, num, vring_align,
> vdev, weak_barriers, may_reduce_num,
> --
> 2.31.0
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 14/22] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET

2022-02-15 Thread Jason Wang
On Mon, Feb 14, 2022 at 4:14 PM Xuan Zhuo  wrote:
>
> This patch implements virtio pci support for QUEUE RESET.
>
> Performing reset on a queue is divided into these steps:
>
> 1. reset_vq: reset one vq
> 2. recycle the buffer from vq by virtqueue_detach_unused_buf()
> 3. release the ring of the vq by vring_release_virtqueue()
> 4. enable_reset_vq: re-enable the reset queue
>
> This patch implements reset_vq, enable_reset_vq in the pci scenario.
>
> Signed-off-by: Xuan Zhuo 
> ---
>  drivers/virtio/virtio_pci_common.c |  8 ++--
>  drivers/virtio/virtio_pci_modern.c | 60 ++
>  2 files changed, 65 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/virtio/virtio_pci_common.c 
> b/drivers/virtio/virtio_pci_common.c
> index 5a4f750a0b97..9ea319b1d404 100644
> --- a/drivers/virtio/virtio_pci_common.c
> +++ b/drivers/virtio/virtio_pci_common.c
> @@ -255,9 +255,11 @@ static void vp_del_vq(struct virtqueue *vq)
> struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
> unsigned long flags;
>
> -   spin_lock_irqsave(_dev->lock, flags);
> -   list_del(>node);
> -   spin_unlock_irqrestore(_dev->lock, flags);
> +   if (!vq->reset) {
> +   spin_lock_irqsave(_dev->lock, flags);
> +   list_del(>node);
> +   spin_unlock_irqrestore(_dev->lock, flags);
> +   }
>
> vp_dev->del_vq(info);
> kfree(info);
> diff --git a/drivers/virtio/virtio_pci_modern.c 
> b/drivers/virtio/virtio_pci_modern.c
> index bed3e9b84272..7d28f4c36fc2 100644
> --- a/drivers/virtio/virtio_pci_modern.c
> +++ b/drivers/virtio/virtio_pci_modern.c
> @@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device 
> *vdev, u64 features)
> if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
> pci_find_ext_capability(pci_dev, 
> PCI_EXT_CAP_ID_SRIOV))
> __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
> +
> +   if (features & BIT_ULL(VIRTIO_F_RING_RESET))
> +   __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
>  }
>
>  /* virtio config->finalize_features() implementation */
> @@ -176,6 +179,59 @@ static void vp_reset(struct virtio_device *vdev)
> vp_disable_cbs(vdev);
>  }
>
> +static int vp_modern_reset_vq(struct virtqueue *vq)
> +{
> +   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> +   struct virtio_pci_modern_device *mdev = _dev->mdev;
> +   struct virtio_pci_vq_info *info;
> +   unsigned long flags;
> +
> +   if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
> +   return -ENOENT;
> +
> +   vp_modern_set_queue_reset(mdev, vq->index);
> +
> +   info = vp_dev->vqs[vq->index];
> +

Any reason that we don't need to disable irq here as the previous versions did?


> +   /* delete vq from irq handler */
> +   spin_lock_irqsave(_dev->lock, flags);
> +   list_del(>node);
> +   spin_unlock_irqrestore(_dev->lock, flags);
> +
> +   INIT_LIST_HEAD(>node);
> +
> +   vq->reset = VIRTQUEUE_RESET_STAGE_DEVICE;
> +
> +   return 0;
> +}
> +
> +static int vp_modern_enable_reset_vq(struct virtqueue *vq)
> +{
> +   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> +   struct virtio_pci_modern_device *mdev = _dev->mdev;
> +   struct virtio_pci_vq_info *info;
> +   struct virtqueue *_vq;
> +
> +   if (vq->reset != VIRTQUEUE_RESET_STAGE_RELEASE)
> +   return -EBUSY;
> +
> +   /* check queue reset status */
> +   if (vp_modern_get_queue_reset(mdev, vq->index) != 1)
> +   return -EBUSY;
> +
> +   info = vp_dev->vqs[vq->index];
> +   _vq = vp_setup_vq(vq->vdev, vq->index, NULL, NULL, NULL,
> +info->msix_vector);

So we only care about moden devices, this means using vp_setup_vq()
with NULL seems tricky.

As replied in another thread, I would simply ask the caller to call
the vring reallocation helper. See the reply for patch 17.

Thanks


> +   if (IS_ERR(_vq)) {
> +   vq->reset = VIRTQUEUE_RESET_STAGE_RELEASE;
> +   return PTR_ERR(_vq);
> +   }
> +
> +   vp_modern_set_queue_enable(_dev->mdev, vq->index, true);
> +
> +   return 0;
> +}
> +
>  static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
>  {
> return vp_modern_config_vector(_dev->mdev, vector);
> @@ -397,6 +453,8 @@ static const struct virtio_config_ops 
> virtio_pci_config_nodev_ops = {
> .set_vq_affinity = vp_set_vq_affinity,
> .get_vq_affinity = vp_get_vq_affinity,
> .get_shm_region  = vp_get_shm_region,
> +   .reset_vq= vp_modern_reset_vq,
> +   .enable_reset_vq = vp_modern_enable_reset_vq,
>  };
>
>  static const struct virtio_config_ops virtio_pci_config_ops = {
> @@ -415,6 +473,8 @@ static const struct virtio_config_ops 
> virtio_pci_config_ops = {
> .set_vq_affinity = vp_set_vq_affinity,
> .get_vq_affinity = vp_get_vq_affinity,
>

Re: [PATCH v5 20/22] virtio_net: set the default max ring num

2022-02-15 Thread Jason Wang
On Mon, Feb 14, 2022 at 4:14 PM Xuan Zhuo  wrote:
>
> Sets the default maximum ring num based on virtio_set_max_ring_num().
>
> The default maximum ring num is 1024.

Having a default value is pretty useful, I see 32K is used by default for IFCVF.

Rethink this, how about having a different default value based on the speed?

Without SPEED_DUPLEX, we use 1024. Otherwise

10g 4096
40g 8192

etc.

(The number are just copied from the 10g/40g default parameter from
other vendors)

Thanks

>
> Signed-off-by: Xuan Zhuo 
> ---
>  drivers/net/virtio_net.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index a4ffd7cdf623..77e61fe0b2ce 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -35,6 +35,8 @@ module_param(napi_tx, bool, 0644);
>  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
>  #define GOOD_COPY_LEN  128
>
> +#define VIRTNET_DEFAULT_MAX_RING_NUM 1024
> +
>  #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
>
>  /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
> @@ -3045,6 +3047,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> ctx[rxq2vq(i)] = true;
> }
>
> +   virtio_set_max_ring_num(vi->vdev, VIRTNET_DEFAULT_MAX_RING_NUM);
> +
> ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
>   names, ctx, NULL);
> if (ret)
> --
> 2.31.0
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 17/22] virtio_net: support rx/tx queue reset

2022-02-15 Thread Jason Wang
On Mon, Feb 14, 2022 at 4:14 PM Xuan Zhuo  wrote:
>
> This patch implements the reset function of the rx, tx queues.
>
> Based on this function, it is possible to modify the ring num of the
> queue. And quickly recycle the buffer in the queue.
>
> In the process of the queue disable, in theory, as long as virtio
> supports queue reset, there will be no exceptions.
>
> However, in the process of the queue enable, there may be exceptions due to
> memory allocation.  In this case, vq is not available, but we still have
> to execute napi_enable(). Because napi_disable is similar to a lock,
> napi_enable must be called after calling napi_disable.
>
> Signed-off-by: Xuan Zhuo 
> ---
>  drivers/net/virtio_net.c | 123 +++
>  1 file changed, 123 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 9a1445236e23..a4ffd7cdf623 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -251,6 +251,11 @@ struct padded_vnet_hdr {
> char padding[4];
>  };
>
> +static void virtnet_sq_free_unused_bufs(struct virtnet_info *vi,
> +   struct send_queue *sq);
> +static void virtnet_rq_free_unused_bufs(struct virtnet_info *vi,
> +   struct receive_queue *rq);
> +
>  static bool is_xdp_frame(void *ptr)
>  {
> return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> @@ -1369,6 +1374,9 @@ static void virtnet_napi_enable(struct virtqueue *vq, 
> struct napi_struct *napi)
>  {
> napi_enable(napi);
>
> +   if (vq->reset)
> +   return;
> +
> /* If all buffers were filled by other side before we napi_enabled, we
>  * won't get another interrupt, so process any outstanding packets 
> now.
>  * Call local_bh_enable after to trigger softIRQ processing.
> @@ -1413,6 +1421,10 @@ static void refill_work(struct work_struct *work)
> struct receive_queue *rq = >rq[i];
>
> napi_disable(>napi);
> +   if (rq->vq->reset) {
> +   virtnet_napi_enable(rq->vq, >napi);
> +   continue;
> +   }
> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> virtnet_napi_enable(rq->vq, >napi);
>
> @@ -1523,6 +1535,9 @@ static void virtnet_poll_cleantx(struct receive_queue 
> *rq)
> if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> return;
>
> +   if (sq->vq->reset)
> +   return;
> +
> if (__netif_tx_trylock(txq)) {
> do {
> virtqueue_disable_cb(sq->vq);
> @@ -1769,6 +1784,114 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, 
> struct net_device *dev)
> return NETDEV_TX_OK;
>  }
>
> +static int virtnet_rx_vq_disable(struct virtnet_info *vi,
> +struct receive_queue *rq)
> +{
> +   int err;
> +
> +   napi_disable(>napi);
> +
> +   err = virtio_reset_vq(rq->vq);
> +   if (err)
> +   goto err;
> +
> +   virtnet_rq_free_unused_bufs(vi, rq);
> +
> +   vring_release_virtqueue(rq->vq);
> +
> +   return 0;
> +
> +err:
> +   virtnet_napi_enable(rq->vq, >napi);
> +   return err;
> +}
> +
> +static int virtnet_tx_vq_disable(struct virtnet_info *vi,
> +struct send_queue *sq)
> +{
> +   struct netdev_queue *txq;
> +   int err, qindex;
> +
> +   qindex = sq - vi->sq;
> +
> +   txq = netdev_get_tx_queue(vi->dev, qindex);
> +   __netif_tx_lock_bh(txq);
> +
> +   netif_stop_subqueue(vi->dev, qindex);
> +   virtnet_napi_tx_disable(>napi);
> +
> +   err = virtio_reset_vq(sq->vq);
> +   if (err) {
> +   virtnet_napi_tx_enable(vi, sq->vq, >napi);
> +   netif_start_subqueue(vi->dev, qindex);
> +
> +   __netif_tx_unlock_bh(txq);
> +   return err;
> +   }
> +   __netif_tx_unlock_bh(txq);
> +
> +   virtnet_sq_free_unused_bufs(vi, sq);
> +
> +   vring_release_virtqueue(sq->vq);
> +
> +   return 0;
> +}
> +
> +static int virtnet_tx_vq_enable(struct virtnet_info *vi, struct send_queue 
> *sq)
> +{
> +   int err;
> +
> +   err = virtio_enable_resetq(sq->vq);
> +   if (!err)
> +   netif_start_subqueue(vi->dev, sq - vi->sq);
> +
> +   virtnet_napi_tx_enable(vi, sq->vq, >napi);
> +
> +   return err;
> +}
> +
> +static int virtnet_rx_vq_enable(struct virtnet_info *vi,
> +   struct receive_queue *rq)
> +{
> +   int err;

So the API should be design in a consistent way.

In rx_vq_disable() we do:

reset()
detach_unused_bufs()
vring_release_virtqueue()

here it's better to exactly the reverse

vring_attach_virtqueue() // this is the helper I guess in patch 5,
reverse of the vring_release_virtqueue()
try_refill_recv() // reverse of the detach_unused_bufs()
enable_reset() // 

Re: [PATCH v5 19/22] virtio: add helper virtio_set_max_ring_num()

2022-02-15 Thread Jason Wang
On Mon, Feb 14, 2022 at 4:15 PM Xuan Zhuo  wrote:
>
> Added helper virtio_set_max_ring_num() to set the upper limit of ring
> num when creating a virtqueue.
>
> Can be used to limit ring num before find_vqs() call. Or change ring num
> when re-enable reset queue.

Do we have a chance that RX and TX may want different ring size? If
yes, it might be even better to have per vq limit via find_vqs()?

>
> Signed-off-by: Xuan Zhuo 
> ---
>  drivers/virtio/virtio_ring.c  |  6 ++
>  include/linux/virtio.h|  1 +
>  include/linux/virtio_config.h | 30 ++
>  3 files changed, 37 insertions(+)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 1a123b5e5371..a77a82883e44 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -943,6 +943,9 @@ static struct virtqueue *vring_create_virtqueue_split(
> size_t queue_size_in_bytes;
> struct vring vring;
>
> +   if (vdev->max_ring_num && num > vdev->max_ring_num)
> +   num = vdev->max_ring_num;
> +
> /* We assume num is a power of 2. */
> if (num & (num - 1)) {
> dev_warn(>dev, "Bad virtqueue length %u\n", num);
> @@ -1692,6 +1695,9 @@ static struct virtqueue *vring_create_virtqueue_packed(
> dma_addr_t ring_dma_addr, driver_event_dma_addr, 
> device_event_dma_addr;
> size_t ring_size_in_bytes, event_size_in_bytes;
>
> +   if (vdev->max_ring_num && num > vdev->max_ring_num)
> +   num = vdev->max_ring_num;
> +
> ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
>
> ring = vring_alloc_queue(vdev, ring_size_in_bytes,
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index 1153b093c53d..45525beb2ec4 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -127,6 +127,7 @@ struct virtio_device {
> struct list_head vqs;
> u64 features;
> void *priv;
> +   u16 max_ring_num;
>  };
>
>  static inline struct virtio_device *dev_to_virtio(struct device *_dev)
> diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> index cd7f7f44ce38..d7cb2d0341ee 100644
> --- a/include/linux/virtio_config.h
> +++ b/include/linux/virtio_config.h
> @@ -200,6 +200,36 @@ static inline bool virtio_has_dma_quirk(const struct 
> virtio_device *vdev)
> return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
>  }
>
> +/**
> + * virtio_set_max_ring_num - set max ring num
> + * @vdev: the device
> + * @num: max ring num. Zero clear the limit.
> + *
> + * When creating a virtqueue, use this value as the upper limit of ring num.
> + *
> + * Returns 0 on success or error status
> + */
> +static inline
> +int virtio_set_max_ring_num(struct virtio_device *vdev, u16 num)
> +{

Having a dedicated helper for a per device parameter usually means the
use cases are greatly limited. For example, this seems can only be
used when DRIVER_OK is not set?

And in patch 17 this function is called even if we only modify the RX
size, this is probably another call for a more flexible API as I
suggest like exporting vring allocation/deallocation helper and extend
find_vqs()?

Thanks


> +   if (!num) {
> +   vdev->max_ring_num = num;
> +   return 0;
> +   }
> +
> +   if (!virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
> +   if (!is_power_of_2(num)) {
> +   num = __rounddown_pow_of_two(num);
> +
> +   if (!num)
> +   return -EINVAL;
> +   }
> +   }
> +
> +   vdev->max_ring_num = num;
> +   return 0;
> +}
> +
>  static inline
>  struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
> vq_callback_t *c, const char *n)
> --
> 2.31.0
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC v3 2/2] pmem: enable pmem_submit_bio for asynchronous flush

2022-02-15 Thread Dan Williams
On Tue, Jan 11, 2022 at 8:21 AM Pankaj Gupta
 wrote:
>
> Return from "pmem_submit_bio" when asynchronous flush is
> still in progress in other context.
>
> Signed-off-by: Pankaj Gupta 
> ---
>  drivers/nvdimm/pmem.c| 15 ---
>  drivers/nvdimm/region_devs.c |  4 +++-
>  2 files changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index fe7ece1534e1..f20e30277a68 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -201,8 +201,12 @@ static void pmem_submit_bio(struct bio *bio)
> struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
> struct nd_region *nd_region = to_region(pmem);
>
> -   if (bio->bi_opf & REQ_PREFLUSH)
> +   if (bio->bi_opf & REQ_PREFLUSH) {
> ret = nvdimm_flush(nd_region, bio);
> +   /* asynchronous flush completes in other context */

I think a negative error code is a confusing way to capture the case
of "bio successfully coalesced to previously pending flush request.
Perhaps reserve negative codes for failure, 0 for synchronously
completed, and > 0 for coalesced flush request.

> +   if (ret == -EINPROGRESS)
> +   return;
> +   }
>
> do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
> if (do_acct)
> @@ -222,13 +226,18 @@ static void pmem_submit_bio(struct bio *bio)
> if (do_acct)
> bio_end_io_acct(bio, start);
>
> -   if (bio->bi_opf & REQ_FUA)
> +   if (bio->bi_opf & REQ_FUA) {
> ret = nvdimm_flush(nd_region, bio);
> +   /* asynchronous flush completes in other context */
> +   if (ret == -EINPROGRESS)
> +   return;
> +   }
>
> if (ret)
> bio->bi_status = errno_to_blk_status(ret);
>
> -   bio_endio(bio);
> +   if (bio)
> +   bio_endio(bio);
>  }
>
>  static int pmem_rw_page(struct block_device *bdev, sector_t sector,
> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
> index 9ccf3d608799..8512d2eaed4e 100644
> --- a/drivers/nvdimm/region_devs.c
> +++ b/drivers/nvdimm/region_devs.c
> @@ -1190,7 +1190,9 @@ int nvdimm_flush(struct nd_region *nd_region, struct 
> bio *bio)
> if (!nd_region->flush)
> rc = generic_nvdimm_flush(nd_region);
> else {
> -   if (nd_region->flush(nd_region, bio))
> +   rc = nd_region->flush(nd_region, bio);
> +   /* ongoing flush in other context */
> +   if (rc && rc != -EINPROGRESS)
> rc = -EIO;

Why change this to -EIO vs just let the error code through untranslated?

> }
>
> --
> 2.25.1
>
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] drm/virtio: Fix capset-id query size

2022-02-15 Thread Rob Clark
From: Rob Clark 

The UABI was already defined for pointer to 64b value, and all the
userspace users of this ioctl that I could find are already using a
uint64_t (but zeroing it out to work around kernel only copying 32b).
Unfortunately this ioctl doesn't have a length field, so out of paranoia
I restricted the change to copy 64b to the single 64b param that can be
queried.

Fixes: 78aa20fa4381 ("drm/virtio: implement context init: advertise feature to 
userspace")
Signed-off-by: Rob Clark 
---
 drivers/gpu/drm/virtio/virtgpu_ioctl.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c 
b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 0f2f3f54dbf9..0158d27d5645 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -269,7 +269,8 @@ static int virtio_gpu_getparam_ioctl(struct drm_device 
*dev, void *data,
 {
struct virtio_gpu_device *vgdev = dev->dev_private;
struct drm_virtgpu_getparam *param = data;
-   int value;
+   int value, ret, sz = sizeof(int);
+   uint64_t value64;
 
switch (param->param) {
case VIRTGPU_PARAM_3D_FEATURES:
@@ -291,13 +292,20 @@ static int virtio_gpu_getparam_ioctl(struct drm_device 
*dev, void *data,
value = vgdev->has_context_init ? 1 : 0;
break;
case VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs:
-   value = vgdev->capset_id_mask;
+   value64 = vgdev->capset_id_mask;
+   sz = sizeof(value64);
break;
default:
return -EINVAL;
}
-   if (copy_to_user(u64_to_user_ptr(param->value), , sizeof(int)))
-   return -EFAULT;
+
+   if (sz == sizeof(int)) {
+   if (copy_to_user(u64_to_user_ptr(param->value), , sz))
+   return -EFAULT;
+   } else {
+   if (copy_to_user(u64_to_user_ptr(param->value), , sz))
+   return -EFAULT;
+   }
 
return 0;
 }
-- 
2.34.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] drm/virtio: Fix capset-id query size

2022-02-15 Thread Gurchetan Singh
On Tue, Feb 15, 2022 at 5:15 PM Rob Clark  wrote:

> From: Rob Clark 
>
> The UABI was already defined for pointer to 64b value, and all the
> userspace users of this ioctl that I could find are already using a
> uint64_t (but zeroing it out to work around kernel only copying 32b).
> Unfortunately this ioctl doesn't have a length field, so out of paranoia
> I restricted the change to copy 64b to the single 64b param that can be
> queried.
>
> Fixes: 78aa20fa4381 ("drm/virtio: implement context init: advertise
> feature to userspace")
> Signed-off-by: Rob Clark 
>

Reviewed-by: Gurchetan Singh 


> ---
>  drivers/gpu/drm/virtio/virtgpu_ioctl.c | 16 
>  1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> index 0f2f3f54dbf9..0158d27d5645 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> @@ -269,7 +269,8 @@ static int virtio_gpu_getparam_ioctl(struct drm_device
> *dev, void *data,
>  {
> struct virtio_gpu_device *vgdev = dev->dev_private;
> struct drm_virtgpu_getparam *param = data;
> -   int value;
> +   int value, ret, sz = sizeof(int);
> +   uint64_t value64;
>
> switch (param->param) {
> case VIRTGPU_PARAM_3D_FEATURES:
> @@ -291,13 +292,20 @@ static int virtio_gpu_getparam_ioctl(struct
> drm_device *dev, void *data,
> value = vgdev->has_context_init ? 1 : 0;
> break;
> case VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs:
> -   value = vgdev->capset_id_mask;
> +   value64 = vgdev->capset_id_mask;
> +   sz = sizeof(value64);
> break;
> default:
> return -EINVAL;
> }
> -   if (copy_to_user(u64_to_user_ptr(param->value), ,
> sizeof(int)))
> -   return -EFAULT;
> +
> +   if (sz == sizeof(int)) {
> +   if (copy_to_user(u64_to_user_ptr(param->value), ,
> sz))
> +   return -EFAULT;
> +   } else {
> +   if (copy_to_user(u64_to_user_ptr(param->value), ,
> sz))
> +   return -EFAULT;
> +   }
>
> return 0;
>  }
> --
> 2.34.1
>
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v5 08/22] virtio_ring: queue_reset: add vring_release_virtqueue()

2022-02-15 Thread Jason Wang
On Mon, Feb 14, 2022 at 4:14 PM Xuan Zhuo  wrote:
>
> Added vring_release_virtqueue() to release the ring of the vq.
>
> In this process, vq is removed from the vdev->vqs queue. And the memory
> of the ring is released
>
> Signed-off-by: Xuan Zhuo 
> ---
>  drivers/virtio/virtio_ring.c | 18 +-
>  include/linux/virtio.h   | 12 
>  2 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index c5dd17c7dd4a..b37753bdbbc4 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -1730,6 +1730,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
> vq->vq.vdev = vdev;
> vq->vq.num_free = num;
> vq->vq.index = index;
> +   vq->vq.reset = VIRTQUEUE_RESET_STAGE_NONE;

So we don't have a similar check for detach_unused_buf(), I guess it
should be sufficient to document the API requirement. Otherwise we
probably need some barriers/ordering which are not worthwhile just for
figuring out bad API usage.

> vq->we_own_ring = true;
> vq->notify = notify;
> vq->weak_barriers = weak_barriers;
> @@ -2218,6 +2219,7 @@ static int __vring_init_virtqueue(struct virtqueue *_vq,
> vq->vq.vdev = vdev;
> vq->vq.num_free = vring.num;
> vq->vq.index = index;
> +   vq->vq.reset = VIRTQUEUE_RESET_STAGE_NONE;
> vq->we_own_ring = false;
> vq->notify = notify;
> vq->weak_barriers = weak_barriers;
> @@ -2397,11 +2399,25 @@ void vring_del_virtqueue(struct virtqueue *_vq)
>  {
> struct vring_virtqueue *vq = to_vvq(_vq);
>
> -   __vring_del_virtqueue(vq);
> +   if (_vq->reset != VIRTQUEUE_RESET_STAGE_RELEASE)
> +   __vring_del_virtqueue(vq);
> kfree(vq);
>  }
>  EXPORT_SYMBOL_GPL(vring_del_virtqueue);
>
> +void vring_release_virtqueue(struct virtqueue *_vq)
> +{

If we agree on that we need a allocation routine, we probably need to
rename this as vring_free_virtqueue()

Thanks

> +   struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +   if (_vq->reset != VIRTQUEUE_RESET_STAGE_DEVICE)
> +   return;
> +
> +   __vring_del_virtqueue(vq);
> +
> +   _vq->reset = VIRTQUEUE_RESET_STAGE_RELEASE;
> +}
> +EXPORT_SYMBOL_GPL(vring_release_virtqueue);
> +
>  /* Manipulates transport-specific feature bits. */
>  void vring_transport_features(struct virtio_device *vdev)
>  {
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index 72292a62cd90..cdb2a551257c 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -10,6 +10,12 @@
>  #include 
>  #include 
>
> +enum virtqueue_reset_stage {
> +   VIRTQUEUE_RESET_STAGE_NONE,
> +   VIRTQUEUE_RESET_STAGE_DEVICE,
> +   VIRTQUEUE_RESET_STAGE_RELEASE,
> +};
> +
>  /**
>   * virtqueue - a queue to register buffers for sending or receiving.
>   * @list: the chain of virtqueues for this device
> @@ -32,6 +38,7 @@ struct virtqueue {
> unsigned int index;
> unsigned int num_free;
> void *priv;
> +   enum virtqueue_reset_stage reset;
>  };
>
>  int virtqueue_add_outbuf(struct virtqueue *vq,
> @@ -196,4 +203,9 @@ void unregister_virtio_driver(struct virtio_driver *drv);
>  #define module_virtio_driver(__virtio_driver) \
> module_driver(__virtio_driver, register_virtio_driver, \
> unregister_virtio_driver)
> +/*
> + * Resets a virtqueue. Just frees the ring, not free vq.
> + * This function must be called after reset_vq().
> + */
> +void vring_release_virtqueue(struct virtqueue *vq);
>  #endif /* _LINUX_VIRTIO_H */
> --
> 2.31.0
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 22/22] virtio_net: support set_ringparam

2022-02-15 Thread Jason Wang
On Mon, Feb 14, 2022 at 4:15 PM Xuan Zhuo  wrote:
>
> Support set_ringparam based on virtio queue reset.
>
> The rx,tx_pending required to be passed must be power of 2.
>
> Signed-off-by: Xuan Zhuo 
> ---
>  drivers/net/virtio_net.c | 50 
>  1 file changed, 50 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index f9bb760c6dbd..bf460ea87354 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2308,6 +2308,55 @@ static void virtnet_get_ringparam(struct net_device 
> *dev,
> ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
>  }
>
> +static int virtnet_set_ringparam(struct net_device *dev,
> +struct ethtool_ringparam *ring,
> +struct kernel_ethtool_ringparam *kernel_ring,
> +struct netlink_ext_ack *extack)
> +{
> +   struct virtnet_info *vi = netdev_priv(dev);
> +   u32 rx_pending, tx_pending;
> +   int i, err;
> +
> +   if (ring->rx_mini_pending || ring->rx_jumbo_pending)
> +   return -EINVAL;
> +
> +   rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
> +   tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
> +
> +   if (ring->rx_pending == rx_pending &&
> +   ring->tx_pending == tx_pending)
> +   return 0;
> +
> +   if (ring->rx_pending > virtqueue_get_vring_max_size(vi->rq[0].vq))
> +   return -EINVAL;
> +
> +   if (ring->tx_pending > virtqueue_get_vring_max_size(vi->sq[0].vq))
> +   return -EINVAL;
> +
> +   if (!is_power_of_2(ring->rx_pending))
> +   return -EINVAL;
> +
> +   if (!is_power_of_2(ring->tx_pending))
> +   return -EINVAL;

We'd better leave those checks to the virtio core where it knows
packed virtqueue doesn't have this limitation.

> +
> +   for (i = 0; i < vi->max_queue_pairs; i++) {
> +   if (ring->tx_pending != tx_pending) {
> +   virtio_set_max_ring_num(vi->vdev, ring->tx_pending);

The name is kind of confusing, I guess it should not be the maximum
ring. And this needs to be done after the reset, and it would be even
better to disallow such change when virtqueue is not resetted.

> +   err = virtnet_tx_vq_reset(vi, i);
> +   if (err)
> +   return err;
> +   }
> +
> +   if (ring->rx_pending != rx_pending) {
> +   virtio_set_max_ring_num(vi->vdev, ring->rx_pending);
> +   err = virtnet_rx_vq_reset(vi, i);
> +   if (err)
> +   return err;
> +   }
> +   }
> +
> +   return 0;
> +}
>
>  static void virtnet_get_drvinfo(struct net_device *dev,
> struct ethtool_drvinfo *info)
> @@ -2541,6 +2590,7 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
> .get_drvinfo = virtnet_get_drvinfo,
> .get_link = ethtool_op_get_link,
> .get_ringparam = virtnet_get_ringparam,
> +   .set_ringparam = virtnet_set_ringparam,
> .get_strings = virtnet_get_strings,
> .get_sset_count = virtnet_get_sset_count,
> .get_ethtool_stats = virtnet_get_ethtool_stats,
> --
> 2.31.0
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 22/22] virtio_net: support set_ringparam

2022-02-15 Thread Xuan Zhuo
On Wed, 16 Feb 2022 12:14:39 +0800, Jason Wang  wrote:
> On Mon, Feb 14, 2022 at 4:15 PM Xuan Zhuo  wrote:
> >
> > Support set_ringparam based on virtio queue reset.
> >
> > The rx,tx_pending required to be passed must be power of 2.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >  drivers/net/virtio_net.c | 50 
> >  1 file changed, 50 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index f9bb760c6dbd..bf460ea87354 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2308,6 +2308,55 @@ static void virtnet_get_ringparam(struct net_device 
> > *dev,
> > ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
> >  }
> >
> > +static int virtnet_set_ringparam(struct net_device *dev,
> > +struct ethtool_ringparam *ring,
> > +struct kernel_ethtool_ringparam 
> > *kernel_ring,
> > +struct netlink_ext_ack *extack)
> > +{
> > +   struct virtnet_info *vi = netdev_priv(dev);
> > +   u32 rx_pending, tx_pending;
> > +   int i, err;
> > +
> > +   if (ring->rx_mini_pending || ring->rx_jumbo_pending)
> > +   return -EINVAL;
> > +
> > +   rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
> > +   tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
> > +
> > +   if (ring->rx_pending == rx_pending &&
> > +   ring->tx_pending == tx_pending)
> > +   return 0;
> > +
> > +   if (ring->rx_pending > virtqueue_get_vring_max_size(vi->rq[0].vq))
> > +   return -EINVAL;
> > +
> > +   if (ring->tx_pending > virtqueue_get_vring_max_size(vi->sq[0].vq))
> > +   return -EINVAL;
> > +
> > +   if (!is_power_of_2(ring->rx_pending))
> > +   return -EINVAL;
> > +
> > +   if (!is_power_of_2(ring->tx_pending))
> > +   return -EINVAL;
>
> We'd better leave those checks to the virtio core where it knows
> packed virtqueue doesn't have this limitation.

OK.

>
> > +
> > +   for (i = 0; i < vi->max_queue_pairs; i++) {
> > +   if (ring->tx_pending != tx_pending) {
> > +   virtio_set_max_ring_num(vi->vdev, ring->tx_pending);
>
> The name is kind of confusing, I guess it should not be the maximum
> ring. And this needs to be done after the reset, and it would be even
> better to disallow such change when virtqueue is not resetted.

OK.

Thanks.

>
> > +   err = virtnet_tx_vq_reset(vi, i);
> > +   if (err)
> > +   return err;
> > +   }
> > +
> > +   if (ring->rx_pending != rx_pending) {
> > +   virtio_set_max_ring_num(vi->vdev, ring->rx_pending);
> > +   err = virtnet_rx_vq_reset(vi, i);
> > +   if (err)
> > +   return err;
> > +   }
> > +   }
> > +
> > +   return 0;
> > +}
> >
> >  static void virtnet_get_drvinfo(struct net_device *dev,
> > struct ethtool_drvinfo *info)
> > @@ -2541,6 +2590,7 @@ static const struct ethtool_ops virtnet_ethtool_ops = 
> > {
> > .get_drvinfo = virtnet_get_drvinfo,
> > .get_link = ethtool_op_get_link,
> > .get_ringparam = virtnet_get_ringparam,
> > +   .set_ringparam = virtnet_set_ringparam,
> > .get_strings = virtnet_get_strings,
> > .get_sset_count = virtnet_get_sset_count,
> > .get_ethtool_stats = virtnet_get_ethtool_stats,
> > --
> > 2.31.0
> >
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 20/22] virtio_net: set the default max ring num

2022-02-15 Thread Xuan Zhuo
On Wed, 16 Feb 2022 12:14:31 +0800, Jason Wang  wrote:
> On Mon, Feb 14, 2022 at 4:14 PM Xuan Zhuo  wrote:
> >
> > Sets the default maximum ring num based on virtio_set_max_ring_num().
> >
> > The default maximum ring num is 1024.
>
> Having a default value is pretty useful, I see 32K is used by default for 
> IFCVF.
>
> Rethink this, how about having a different default value based on the speed?
>
> Without SPEED_DUPLEX, we use 1024. Otherwise
>
> 10g 4096
> 40g 8192

We can define different default values of tx and rx by the way. This way I can
just use it in the new interface of find_vqs().

without SPEED_DUPLEX:  tx 512 rx 1024

Thanks.


>
> etc.
>
> (The number are just copied from the 10g/40g default parameter from
> other vendors)
>
> Thanks
>
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >  drivers/net/virtio_net.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index a4ffd7cdf623..77e61fe0b2ce 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -35,6 +35,8 @@ module_param(napi_tx, bool, 0644);
> >  #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
> >  #define GOOD_COPY_LEN  128
> >
> > +#define VIRTNET_DEFAULT_MAX_RING_NUM 1024
> > +
> >  #define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
> >
> >  /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head 
> > */
> > @@ -3045,6 +3047,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
> > ctx[rxq2vq(i)] = true;
> > }
> >
> > +   virtio_set_max_ring_num(vi->vdev, VIRTNET_DEFAULT_MAX_RING_NUM);
> > +
> > ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,
> >   names, ctx, NULL);
> > if (ret)
> > --
> > 2.31.0
> >
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v5 19/22] virtio: add helper virtio_set_max_ring_num()

2022-02-15 Thread Xuan Zhuo
On Wed, 16 Feb 2022 12:14:04 +0800, Jason Wang  wrote:
> On Mon, Feb 14, 2022 at 4:15 PM Xuan Zhuo  wrote:
> >
> > Added helper virtio_set_max_ring_num() to set the upper limit of ring
> > num when creating a virtqueue.
> >
> > Can be used to limit ring num before find_vqs() call. Or change ring num
> > when re-enable reset queue.
>
> Do we have a chance that RX and TX may want different ring size? If
> yes, it might be even better to have per vq limit via find_vqs()?
>
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >  drivers/virtio/virtio_ring.c  |  6 ++
> >  include/linux/virtio.h|  1 +
> >  include/linux/virtio_config.h | 30 ++
> >  3 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 1a123b5e5371..a77a82883e44 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -943,6 +943,9 @@ static struct virtqueue *vring_create_virtqueue_split(
> > size_t queue_size_in_bytes;
> > struct vring vring;
> >
> > +   if (vdev->max_ring_num && num > vdev->max_ring_num)
> > +   num = vdev->max_ring_num;
> > +
> > /* We assume num is a power of 2. */
> > if (num & (num - 1)) {
> > dev_warn(>dev, "Bad virtqueue length %u\n", num);
> > @@ -1692,6 +1695,9 @@ static struct virtqueue 
> > *vring_create_virtqueue_packed(
> > dma_addr_t ring_dma_addr, driver_event_dma_addr, 
> > device_event_dma_addr;
> > size_t ring_size_in_bytes, event_size_in_bytes;
> >
> > +   if (vdev->max_ring_num && num > vdev->max_ring_num)
> > +   num = vdev->max_ring_num;
> > +
> > ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
> >
> > ring = vring_alloc_queue(vdev, ring_size_in_bytes,
> > diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> > index 1153b093c53d..45525beb2ec4 100644
> > --- a/include/linux/virtio.h
> > +++ b/include/linux/virtio.h
> > @@ -127,6 +127,7 @@ struct virtio_device {
> > struct list_head vqs;
> > u64 features;
> > void *priv;
> > +   u16 max_ring_num;
> >  };
> >
> >  static inline struct virtio_device *dev_to_virtio(struct device *_dev)
> > diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
> > index cd7f7f44ce38..d7cb2d0341ee 100644
> > --- a/include/linux/virtio_config.h
> > +++ b/include/linux/virtio_config.h
> > @@ -200,6 +200,36 @@ static inline bool virtio_has_dma_quirk(const struct 
> > virtio_device *vdev)
> > return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
> >  }
> >
> > +/**
> > + * virtio_set_max_ring_num - set max ring num
> > + * @vdev: the device
> > + * @num: max ring num. Zero clear the limit.
> > + *
> > + * When creating a virtqueue, use this value as the upper limit of ring 
> > num.
> > + *
> > + * Returns 0 on success or error status
> > + */
> > +static inline
> > +int virtio_set_max_ring_num(struct virtio_device *vdev, u16 num)
> > +{
>
> Having a dedicated helper for a per device parameter usually means the
> use cases are greatly limited. For example, this seems can only be
> used when DRIVER_OK is not set?
>
> And in patch 17 this function is called even if we only modify the RX
> size, this is probably another call for a more flexible API as I
> suggest like exporting vring allocation/deallocation helper and extend
> find_vqs()?
>

I understand.

Thanks.

> Thanks
>
>
> > +   if (!num) {
> > +   vdev->max_ring_num = num;
> > +   return 0;
> > +   }
> > +
> > +   if (!virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
> > +   if (!is_power_of_2(num)) {
> > +   num = __rounddown_pow_of_two(num);
> > +
> > +   if (!num)
> > +   return -EINVAL;
> > +   }
> > +   }
> > +
> > +   vdev->max_ring_num = num;
> > +   return 0;
> > +}
> > +
> >  static inline
> >  struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
> > vq_callback_t *c, const char *n)
> > --
> > 2.31.0
> >
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization