Re: [PATCH v7 17/26] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET
在 2022/3/10 下午4:20, Xuan Zhuo 写道: On Wed, 9 Mar 2022 16:54:10 +0800, Jason Wang wrote: 在 2022/3/8 下午8:35, Xuan Zhuo 写道: This patch implements virtio pci support for QUEUE RESET. Performing reset on a queue is divided into these steps: 1. virtio_reset_vq() - notify the device to reset the queue 2. virtqueue_detach_unused_buf() - recycle the buffer submitted 3. virtqueue_reset_vring()- reset the vring (may re-alloc) 4. virtio_enable_resetq() - mmap vring to device, and enable the queue This patch implements virtio_reset_vq(), virtio_enable_resetq() in the pci scenario. Signed-off-by: Xuan Zhuo --- drivers/virtio/virtio_pci_common.c | 8 +-- drivers/virtio/virtio_pci_modern.c | 83 ++ 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index fdbde1db5ec5..863d3a8a0956 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -248,9 +248,11 @@ static void vp_del_vq(struct virtqueue *vq) struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; unsigned long flags; - spin_lock_irqsave(_dev->lock, flags); - list_del(>node); - spin_unlock_irqrestore(_dev->lock, flags); + if (!vq->reset) { + spin_lock_irqsave(_dev->lock, flags); + list_del(>node); + spin_unlock_irqrestore(_dev->lock, flags); + } vp_dev->del_vq(info); kfree(info); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 49a4493732cf..3c67d3607802 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) && pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV)) __virtio_set_bit(vdev, VIRTIO_F_SR_IOV); + + if (features & BIT_ULL(VIRTIO_F_RING_RESET)) + __virtio_set_bit(vdev, VIRTIO_F_RING_RESET); } /* virtio config->finalize_features() implementation */ @@ -199,6 +202,82 @@ static int vp_active_vq(struct virtqueue *vq, u16 msix_vec) return 0; } +static int vp_modern_reset_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_modern_device *mdev = _dev->mdev; + struct virtio_pci_vq_info *info; + unsigned long flags; + unsigned int irq; + + if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET)) + return -ENOENT; + + vp_modern_set_queue_reset(mdev, vq->index); + + info = vp_dev->vqs[vq->index]; + + /* delete vq from irq handler */ + spin_lock_irqsave(_dev->lock, flags); + list_del(>node); + spin_unlock_irqrestore(_dev->lock, flags); + + INIT_LIST_HEAD(>node); + + vq->reset = VIRTIO_VQ_RESET_STEP_DEVICE; + + /* sync irq callback. */ + if (vp_dev->intx_enabled) { + irq = vp_dev->pci_dev->irq; + + } else { + if (info->msix_vector == VIRTIO_MSI_NO_VECTOR) + return 0; + + irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); + } + + synchronize_irq(irq); Synchronize_irq() is not sufficient here since it breaks the effort of the interrupt hardening which is done by commits: 080cd7c3ac87 virtio-pci: harden INTX interrupts 9e35276a5344 virtio_pci: harden MSI-X interrupts Unfortunately 080cd7c3ac87 introduces an issue that disable_irq() were used for the affinity managed irq but we're discussing a fix. ok, I think disable_irq() is still used here. I want to determine the solution for this detail first. So I posted the code, I hope Jason can help confirm this point first. There are three situations in which vq corresponds to an interrupt 1. intx 2. msix: per vq vectors 2. msix: share irq Essentially can be divided into two categories: per vq vectors and share irq. For share irq is based on virtqueues to find vq, so I think it is safe as long as list_del() is executed under the protection of the lock. In the case of per vq vectors, disable_irq() is used. See the discussion here[1], disable_irq() could be problematic for the block and scsi device that using affinity managed irq. We're waiting for the IRQ maintainer to comment on a solution. Other looks sane. Thanks [1] https://lkml.org/lkml/2022/3/8/743 Thanks. +static int vp_modern_reset_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_modern_device *mdev = _dev->mdev; + struct virtio_pci_vq_info *info; + unsigned long flags; + unsigned int irq; + + if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET)) + return -ENOENT; + +
Re: [PATCH v7 17/26] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET
在 2022/3/9 下午5:32, Xuan Zhuo 写道: On Wed, 9 Mar 2022 16:54:10 +0800, Jason Wang wrote: 在 2022/3/8 下午8:35, Xuan Zhuo 写道: This patch implements virtio pci support for QUEUE RESET. Performing reset on a queue is divided into these steps: 1. virtio_reset_vq() - notify the device to reset the queue 2. virtqueue_detach_unused_buf() - recycle the buffer submitted 3. virtqueue_reset_vring()- reset the vring (may re-alloc) 4. virtio_enable_resetq() - mmap vring to device, and enable the queue This patch implements virtio_reset_vq(), virtio_enable_resetq() in the pci scenario. Signed-off-by: Xuan Zhuo --- drivers/virtio/virtio_pci_common.c | 8 +-- drivers/virtio/virtio_pci_modern.c | 83 ++ 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index fdbde1db5ec5..863d3a8a0956 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -248,9 +248,11 @@ static void vp_del_vq(struct virtqueue *vq) struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; unsigned long flags; - spin_lock_irqsave(_dev->lock, flags); - list_del(>node); - spin_unlock_irqrestore(_dev->lock, flags); + if (!vq->reset) { + spin_lock_irqsave(_dev->lock, flags); + list_del(>node); + spin_unlock_irqrestore(_dev->lock, flags); + } vp_dev->del_vq(info); kfree(info); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 49a4493732cf..3c67d3607802 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) && pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV)) __virtio_set_bit(vdev, VIRTIO_F_SR_IOV); + + if (features & BIT_ULL(VIRTIO_F_RING_RESET)) + __virtio_set_bit(vdev, VIRTIO_F_RING_RESET); } /* virtio config->finalize_features() implementation */ @@ -199,6 +202,82 @@ static int vp_active_vq(struct virtqueue *vq, u16 msix_vec) return 0; } +static int vp_modern_reset_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_modern_device *mdev = _dev->mdev; + struct virtio_pci_vq_info *info; + unsigned long flags; + unsigned int irq; + + if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET)) + return -ENOENT; + + vp_modern_set_queue_reset(mdev, vq->index); + + info = vp_dev->vqs[vq->index]; + + /* delete vq from irq handler */ + spin_lock_irqsave(_dev->lock, flags); + list_del(>node); + spin_unlock_irqrestore(_dev->lock, flags); + + INIT_LIST_HEAD(>node); + + vq->reset = VIRTIO_VQ_RESET_STEP_DEVICE; + + /* sync irq callback. */ + if (vp_dev->intx_enabled) { + irq = vp_dev->pci_dev->irq; + + } else { + if (info->msix_vector == VIRTIO_MSI_NO_VECTOR) + return 0; + + irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); + } + + synchronize_irq(irq); Synchronize_irq() is not sufficient here since it breaks the effort of the interrupt hardening which is done by commits: 080cd7c3ac87 virtio-pci: harden INTX interrupts 9e35276a5344 virtio_pci: harden MSI-X interrupts Unfortunately 080cd7c3ac87 introduces an issue that disable_irq() were used for the affinity managed irq but we're discussing a fix. I need to understand it first. + + return 0; +} + +static int vp_modern_enable_reset_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_modern_device *mdev = _dev->mdev; + struct virtio_pci_vq_info *info; + unsigned long flags, index; + int err; + + if (vq->reset != VIRTIO_VQ_RESET_STEP_VRING_ATTACH) + return -EBUSY; + + index = vq->index; + info = vp_dev->vqs[index]; + + /* check queue reset status */ + if (vp_modern_get_queue_reset(mdev, index) != 1) + return -EBUSY; + + err = vp_active_vq(vq, info->msix_vector); + if (err) + return err; + + if (vq->callback) { + spin_lock_irqsave(_dev->lock, flags); + list_add(>node, _dev->virtqueues); + spin_unlock_irqrestore(_dev->lock, flags); + } else { + INIT_LIST_HEAD(>node); + } + + vp_modern_set_queue_enable(_dev->mdev, index, true); Any reason we need to check queue_enable() here? The purpose of this function is to enable a reset vq, so call queue_enable() to activate it. Ok, this is what spec mandate.
Re: [PATCH v7 09/26] virtio_ring: split: implement virtqueue_reset_vring_split()
在 2022/3/10 下午12:46, Xuan Zhuo 写道: On Wed, 9 Mar 2022 15:55:44 +0800, Jason Wang wrote: 在 2022/3/8 下午8:35, Xuan Zhuo 写道: virtio ring supports reset. Queue reset is divided into several stages. 1. notify device queue reset 2. vring release 3. attach new vring 4. notify device queue re-enable After the first step is completed, the vring reset operation can be performed. If the newly set vring num does not change, then just reset the vq related value. Otherwise, the vring will be released and the vring will be reallocated. And the vring will be attached to the vq. If this process fails, the function will exit, and the state of the vq will be the vring release state. You can call this function again to reallocate the vring. In addition, vring_align, may_reduce_num are necessary for reallocating vring, so they are retained when creating vq. Signed-off-by: Xuan Zhuo --- drivers/virtio/virtio_ring.c | 69 1 file changed, 69 insertions(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index e0422c04c903..148fb1fd3d5a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -158,6 +158,12 @@ struct vring_virtqueue { /* DMA address and size information */ dma_addr_t queue_dma_addr; size_t queue_size_in_bytes; + + /* The parameters for creating vrings are reserved for +* creating new vrings when enabling reset queue. +*/ + u32 vring_align; + bool may_reduce_num; } split; /* Available for packed ring */ @@ -217,6 +223,12 @@ struct vring_virtqueue { #endif }; +static void vring_free(struct virtqueue *vq); +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq, +struct virtio_device *vdev); +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq, + struct virtio_device *vdev, + struct vring vring); /* * Helpers. @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split( return NULL; } + to_vvq(vq)->split.vring_align = vring_align; + to_vvq(vq)->split.may_reduce_num = may_reduce_num; to_vvq(vq)->split.queue_dma_addr = vring.dma_addr; to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes; to_vvq(vq)->we_own_ring = true; @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split( return vq; } +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num) +{ So what this function does is to resize the virtqueue actually, I suggest to rename it as virtqueue_resize_split(). In addition to resize, when num is 0, the function is to reinitialize vq ring related variables. For example avail_idx_shadow. We need to move those logic to virtio_reset_vq() (I think we agree to have a better name of it). So I think 'reset' is more appropriate. The name is confusing at least to me, since we've already had virtio_reset_vq() and most of the logic is to do the resize. Thanks Thanks. + struct vring_virtqueue *vq = to_vvq(_vq); + struct virtio_device *vdev = _vq->vdev; + struct vring_split vring; + int err; + + if (num > _vq->num_max) + return -E2BIG; + + switch (vq->vq.reset) { + case VIRTIO_VQ_RESET_STEP_NONE: + return -ENOENT; + + case VIRTIO_VQ_RESET_STEP_VRING_ATTACH: + case VIRTIO_VQ_RESET_STEP_DEVICE: + if (vq->split.vring.num == num || !num) + break; + + vring_free(_vq); + + fallthrough; + + case VIRTIO_VQ_RESET_STEP_VRING_RELEASE: + if (!num) + num = vq->split.vring.num; + + err = vring_create_vring_split(, vdev, + vq->split.vring_align, + vq->weak_barriers, + vq->split.may_reduce_num, num); + if (err) + return -ENOMEM; We'd better need a safe fallback here like: If we can't allocate new memory, we can keep using the current one. Otherwise an ethtool -G fail may make the device not usable. This could be done by not freeing the old vring and virtqueue states until new is allocated. + + err = __vring_virtqueue_attach_split(vq, vdev, vring.vring); + if (err) { + vring_free_queue(vdev, vring.queue_size_in_bytes, +vring.queue, +vring.dma_addr); + return -ENOMEM; + } + +
Re: [PATCH 1/2] vsock: each transport cycles only on its own sockets
Hi Jiyong, Thank you for the patch! Yet something to improve: [auto build test ERROR on 3bf7edc84a9eb4007dd9a0cb8878a7e1d5ec6a3b] url: https://github.com/0day-ci/linux/commits/Jiyong-Park/vsock-cycle-only-on-its-own-socket/20220310-205638 base: 3bf7edc84a9eb4007dd9a0cb8878a7e1d5ec6a3b config: x86_64-rhel-8.3 (https://download.01.org/0day-ci/archive/20220311/202203111023.spyfgn7w-...@intel.com/config) compiler: gcc-9 (Ubuntu 9.4.0-1ubuntu1~20.04) 9.4.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/6219060e1d706d7055fb0829b3bf23c5ae84790e git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Jiyong-Park/vsock-cycle-only-on-its-own-socket/20220310-205638 git checkout 6219060e1d706d7055fb0829b3bf23c5ae84790e # save the config file to linux build tree mkdir build_dir make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash net/vmw_vsock/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): net/vmw_vsock/vmci_transport.c: In function 'vmci_transport_handle_detach': >> net/vmw_vsock/vmci_transport.c:808:25: error: 'vmci_transport' undeclared >> (first use in this function) 808 | if (vsk->transport != _transport) | ^~ net/vmw_vsock/vmci_transport.c:808:25: note: each undeclared identifier is reported only once for each function it appears in vim +/vmci_transport +808 net/vmw_vsock/vmci_transport.c 800 801 static void vmci_transport_handle_detach(struct sock *sk) 802 { 803 struct vsock_sock *vsk; 804 805 vsk = vsock_sk(sk); 806 807 /* Only handle our own sockets */ > 808 if (vsk->transport != _transport) 809 return; 810 811 if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { 812 sock_set_flag(sk, SOCK_DONE); 813 814 /* On a detach the peer will not be sending or receiving 815 * anymore. 816 */ 817 vsk->peer_shutdown = SHUTDOWN_MASK; 818 819 /* We should not be sending anymore since the peer won't be 820 * there to receive, but we can still receive if there is data 821 * left in our consume queue. If the local endpoint is a host, 822 * we can't call vsock_stream_has_data, since that may block, 823 * but a host endpoint can't read data once the VM has 824 * detached, so there is no available data in that case. 825 */ 826 if (vsk->local_addr.svm_cid == VMADDR_CID_HOST || 827 vsock_stream_has_data(vsk) <= 0) { 828 if (sk->sk_state == TCP_SYN_SENT) { 829 /* The peer may detach from a queue pair while 830 * we are still in the connecting state, i.e., 831 * if the peer VM is killed after attaching to 832 * a queue pair, but before we complete the 833 * handshake. In that case, we treat the detach 834 * event like a reset. 835 */ 836 837 sk->sk_state = TCP_CLOSE; 838 sk->sk_err = ECONNRESET; 839 sk_error_report(sk); 840 return; 841 } 842 sk->sk_state = TCP_CLOSE; 843 } 844 sk->sk_state_change(sk); 845 } 846 } 847 --- 0-DAY CI Kernel Test Service https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v1 0/5] Add memory shrinker to VirtIO-GPU DRM driver
Hi Am 09.03.22 um 23:25 schrieb Dmitry Osipenko: The reason for this work is to keep GEM shmem pages mapped and allocated even while the BO is neither mapped nor pinned. As it is now, GEM SHMEM creates and releases pages on each pin and unpin, and maps and unmaps memory ranges on each vmap and vunmap. It's all wasteful. Only the first pin and vmap calls should establish pages and mappings and only the purge and free functions should release them. Hm, aren't maps and pins already refcounted? They are. But even when the refcounter reaches 0 on deref, there's no need to remove the mapping or free the memory pages. We can keep them around for the next ref operation. Only the shrinker's purge or freeing the object has to do such clean-up operations. Such behavior is supported by TTM and we already use it in VRAM helpers as well. Best regards Thomas The patchset adds new helpers for BO purging to struct drm_gem_object_funcs. With this, I think it might be possible to have one global DRM shrinker and let it handle all BOs; independent of each BO's memory manager. Thank you, I'll give it a try. -- Thomas Zimmermann Graphics Driver Developer SUSE Software Solutions Germany GmbH Maxfeldstr. 5, 90409 Nürnberg, Germany (HRB 36809, AG Nürnberg) Geschäftsführer: Ivo Totev OpenPGP_signature Description: OpenPGP digital signature ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] vsock: each transport cycles only on its own sockets
On Thu, Mar 10, 2022 at 03:14:20PM +0100, Stefano Garzarella wrote: > On Thu, Mar 10, 2022 at 10:50:11PM +0900, Jiyong Park wrote: > > When iterating over sockets using vsock_for_each_connected_socket, make > > sure that a transport filters out sockets that don't belong to the > > transport. > > > > There actually was an issue caused by this; in a nested VM > > configuration, destroying the nested VM (which often involves the > > closing of /dev/vhost-vsock if there was h2g connections to the nested > > VM) kills not only the h2g connections, but also all existing g2h > > connections to the (outmost) host which are totally unrelated. > > > > Tested: Executed the following steps on Cuttlefish (Android running on a > > VM) [1]: (1) Enter into an `adb shell` session - to have a g2h > > connection inside the VM, (2) open and then close /dev/vhost-vsock by > > `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb > > session is not reset. > > > > [1] https://android.googlesource.com/device/google/cuttlefish/ > > > > Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") > > Signed-off-by: Jiyong Park > > --- > > Changes in v3: > > - Fixed the build error in vmci_transport.c > > Changes in v2: > > - Squashed into a single patch > > > > drivers/vhost/vsock.c| 3 ++- > > include/net/af_vsock.h | 3 ++- > > net/vmw_vsock/af_vsock.c | 9 +++-- > > net/vmw_vsock/virtio_transport.c | 7 +-- > > net/vmw_vsock/vmci_transport.c | 5 - > > 5 files changed, 20 insertions(+), 7 deletions(-) > > It seems okay now, I ran my test suite and everything seems to be fine: > > Reviewed-by: Stefano Garzarella > > Thanks, > Stefanoc Thanks! Acked-by: Michael S. Tsirkin Not a new regression so I think we should take this in the next cycle, let's be careful here especially since previous version was not even build-tested by the contributor. -- MST ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [bug report] virtio_net: support rx/tx queue reset
On Thu, 10 Mar 2022 18:12:21 +0300, Dan Carpenter wrote: > Hello Xuan Zhuo, > > The patch 26ae35c46f93: "virtio_net: support rx/tx queue reset" from > Mar 8, 2022, leads to the following Smatch static checker warning: Yes, thanks to you, I also found this problem today. > > drivers/net/virtio_net.c:1410 virtnet_napi_tx_disable() > warn: sleeping in atomic context > > drivers/net/virtio_net.c > 1829static int virtnet_tx_vq_reset(struct virtnet_info *vi, > 1830 struct send_queue *sq, u32 > ring_num) > 1831{ > 1832struct netdev_queue *txq; > 1833int err, qindex; > 1834 > 1835qindex = sq - vi->sq; > 1836 > 1837txq = netdev_get_tx_queue(vi->dev, qindex); > 1838__netif_tx_lock_bh(txq); > ^^^ > Disables preempt > > 1839 > 1840/* stop tx queue and napi */ > 1841netif_stop_subqueue(vi->dev, qindex); > 1842virtnet_napi_tx_disable(>napi); > ^^ > napi_disable() is a might_sleep() function. > > 1843 > 1844__netif_tx_unlock_bh(txq); > 1845 > 1846/* reset the queue */ > 1847err = virtio_reset_vq(sq->vq); > 1848if (err) { > 1849netif_start_subqueue(vi->dev, qindex); > 1850goto err; > 1851} > > regards, > dan carpenter ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[bug report] virtio_net: support rx/tx queue reset
Hello Xuan Zhuo, The patch 26ae35c46f93: "virtio_net: support rx/tx queue reset" from Mar 8, 2022, leads to the following Smatch static checker warning: drivers/net/virtio_net.c:1410 virtnet_napi_tx_disable() warn: sleeping in atomic context drivers/net/virtio_net.c 1829static int virtnet_tx_vq_reset(struct virtnet_info *vi, 1830 struct send_queue *sq, u32 ring_num) 1831{ 1832struct netdev_queue *txq; 1833int err, qindex; 1834 1835qindex = sq - vi->sq; 1836 1837txq = netdev_get_tx_queue(vi->dev, qindex); 1838__netif_tx_lock_bh(txq); ^^^ Disables preempt 1839 1840/* stop tx queue and napi */ 1841netif_stop_subqueue(vi->dev, qindex); 1842virtnet_napi_tx_disable(>napi); ^^ napi_disable() is a might_sleep() function. 1843 1844__netif_tx_unlock_bh(txq); 1845 1846/* reset the queue */ 1847err = virtio_reset_vq(sq->vq); 1848if (err) { 1849netif_start_subqueue(vi->dev, qindex); 1850goto err; 1851} regards, dan carpenter ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v3] vsock: each transport cycles only on its own sockets
On Thu, Mar 10, 2022 at 10:50:11PM +0900, Jiyong Park wrote: When iterating over sockets using vsock_for_each_connected_socket, make sure that a transport filters out sockets that don't belong to the transport. There actually was an issue caused by this; in a nested VM configuration, destroying the nested VM (which often involves the closing of /dev/vhost-vsock if there was h2g connections to the nested VM) kills not only the h2g connections, but also all existing g2h connections to the (outmost) host which are totally unrelated. Tested: Executed the following steps on Cuttlefish (Android running on a VM) [1]: (1) Enter into an `adb shell` session - to have a g2h connection inside the VM, (2) open and then close /dev/vhost-vsock by `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb session is not reset. [1] https://android.googlesource.com/device/google/cuttlefish/ Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Signed-off-by: Jiyong Park --- Changes in v3: - Fixed the build error in vmci_transport.c Changes in v2: - Squashed into a single patch drivers/vhost/vsock.c| 3 ++- include/net/af_vsock.h | 3 ++- net/vmw_vsock/af_vsock.c | 9 +++-- net/vmw_vsock/virtio_transport.c | 7 +-- net/vmw_vsock/vmci_transport.c | 5 - 5 files changed, 20 insertions(+), 7 deletions(-) It seems okay now, I ran my test suite and everything seems to be fine: Reviewed-by: Stefano Garzarella Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v7 09/26] virtio_ring: split: implement virtqueue_reset_vring_split()
On Thu, 10 Mar 2022 08:04:27 -0500, "Michael S. Tsirkin" wrote: > On Thu, Mar 10, 2022 at 08:33:30PM +0800, Xuan Zhuo wrote: > > On Thu, 10 Mar 2022 07:17:09 -0500, "Michael S. Tsirkin" > > wrote: > > > On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote: > > > > On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" > > > > wrote: > > > > > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote: > > > > > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" > > > > > > wrote: > > > > > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote: > > > > > > > > virtio ring supports reset. > > > > > > > > > > > > > > > > Queue reset is divided into several stages. > > > > > > > > > > > > > > > > 1. notify device queue reset > > > > > > > > 2. vring release > > > > > > > > 3. attach new vring > > > > > > > > 4. notify device queue re-enable > > > > > > > > > > > > > > > > After the first step is completed, the vring reset operation > > > > > > > > can be > > > > > > > > performed. If the newly set vring num does not change, then > > > > > > > > just reset > > > > > > > > the vq related value. > > > > > > > > > > > > > > > > Otherwise, the vring will be released and the vring will be > > > > > > > > reallocated. > > > > > > > > And the vring will be attached to the vq. If this process > > > > > > > > fails, the > > > > > > > > function will exit, and the state of the vq will be the vring > > > > > > > > release > > > > > > > > state. You can call this function again to reallocate the vring. > > > > > > > > > > > > > > > > In addition, vring_align, may_reduce_num are necessary for > > > > > > > > reallocating > > > > > > > > vring, so they are retained when creating vq. > > > > > > > > > > > > > > > > Signed-off-by: Xuan Zhuo > > > > > > > > --- > > > > > > > > drivers/virtio/virtio_ring.c | 69 > > > > > > > > > > > > > > > > 1 file changed, 69 insertions(+) > > > > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c > > > > > > > > b/drivers/virtio/virtio_ring.c > > > > > > > > index e0422c04c903..148fb1fd3d5a 100644 > > > > > > > > --- a/drivers/virtio/virtio_ring.c > > > > > > > > +++ b/drivers/virtio/virtio_ring.c > > > > > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue { > > > > > > > > /* DMA address and size information */ > > > > > > > > dma_addr_t queue_dma_addr; > > > > > > > > size_t queue_size_in_bytes; > > > > > > > > + > > > > > > > > + /* The parameters for creating vrings > > > > > > > > are reserved for > > > > > > > > +* creating new vrings when enabling > > > > > > > > reset queue. > > > > > > > > +*/ > > > > > > > > + u32 vring_align; > > > > > > > > + bool may_reduce_num; > > > > > > > > } split; > > > > > > > > > > > > > > > > /* Available for packed ring */ > > > > > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue { > > > > > > > > #endif > > > > > > > > }; > > > > > > > > > > > > > > > > +static void vring_free(struct virtqueue *vq); > > > > > > > > +static void __vring_virtqueue_init_split(struct > > > > > > > > vring_virtqueue *vq, > > > > > > > > +struct virtio_device > > > > > > > > *vdev); > > > > > > > > +static int __vring_virtqueue_attach_split(struct > > > > > > > > vring_virtqueue *vq, > > > > > > > > + struct virtio_device > > > > > > > > *vdev, > > > > > > > > + struct vring vring); > > > > > > > > > > > > > > > > /* > > > > > > > > * Helpers. > > > > > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue > > > > > > > > *vring_create_virtqueue_split( > > > > > > > > return NULL; > > > > > > > > } > > > > > > > > > > > > > > > > + to_vvq(vq)->split.vring_align = vring_align; > > > > > > > > + to_vvq(vq)->split.may_reduce_num = may_reduce_num; > > > > > > > > to_vvq(vq)->split.queue_dma_addr = vring.dma_addr; > > > > > > > > to_vvq(vq)->split.queue_size_in_bytes = > > > > > > > > vring.queue_size_in_bytes; > > > > > > > > to_vvq(vq)->we_own_ring = true; > > > > > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue > > > > > > > > *vring_create_virtqueue_split( > > > > > > > > return vq; > > > > > > > > } > > > > > > > > > > > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, > > > > > > > > u32 num) > > > > > > > > +{ > > > > > > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > > > > > > + struct virtio_device *vdev = _vq->vdev; > > > > > > > > + struct vring_split vring; > > > > > > > > + int err; > > > > > > > > + > > > > > > > > + if (num > _vq->num_max) > > > > > > > > + return -E2BIG; > > > >
Re: [PATCH v2] vsock: each transport cycles only on its own sockets
On Thu, Mar 10, 2022 at 10:28:29PM +0900, Jiyong Park wrote: When iterating over sockets using vsock_for_each_connected_socket, make sure that a transport filters out sockets that don't belong to the transport. There actually was an issue caused by this; in a nested VM configuration, destroying the nested VM (which often involves the closing of /dev/vhost-vsock if there was h2g connections to the nested VM) kills not only the h2g connections, but also all existing g2h connections to the (outmost) host which are totally unrelated. Tested: Executed the following steps on Cuttlefish (Android running on a VM) [1]: (1) Enter into an `adb shell` session - to have a g2h connection inside the VM, (2) open and then close /dev/vhost-vsock by `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb session is not reset. [1] https://android.googlesource.com/device/google/cuttlefish/ Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Signed-off-by: Jiyong Park --- Changes in v2: - Squashed into a single patch drivers/vhost/vsock.c| 3 ++- include/net/af_vsock.h | 3 ++- net/vmw_vsock/af_vsock.c | 9 +++-- net/vmw_vsock/virtio_transport.c | 7 +-- net/vmw_vsock/vmci_transport.c | 3 ++- 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 37f0b4274113..e6c9d41db1de 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ - vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + vsock_for_each_connected_socket(_transport.transport, + vhost_vsock_reset_orphans); /* Don't check the owner, because we are in the release path, so we * need to stop the vsock device in any case. diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index ab207677e0a8..f742e50207fb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +void vsock_for_each_connected_socket(struct vsock_transport *transport, +void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d4e..f04abf662ec6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, +void (*fn)(struct sock *sk)) { int i; @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, _connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } spin_unlock_bh(_table_lock); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index fb3302fff627..5afc194a58bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -24,6 +24,7 @@ static struct workqueue_struct *virtio_vsock_workqueue; static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static struct virtio_transport virtio_transport; /* forward declaration */ struct virtio_vsock { struct virtio_device *vdev; @@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock, switch (le32_to_cpu(event->id)) { case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: virtio_vsock_update_guest_cid(vsock); - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(_transport.transport, + virtio_vsock_reset_sock); break; } } @@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev) synchronize_rcu(); /* Reset all connected
Re: [PATCH 1/2] vsock: each transport cycles only on its own sockets
On Thu, Mar 10, 2022 at 08:01:53AM -0500, Michael S. Tsirkin wrote: On Thu, Mar 10, 2022 at 09:54:24PM +0900, Jiyong Park wrote: When iterating over sockets using vsock_for_each_connected_socket, make sure that a transport filters out sockets that don't belong to the transport. There actually was an issue caused by this; in a nested VM configuration, destroying the nested VM (which often involves the closing of /dev/vhost-vsock if there was h2g connections to the nested VM) kills not only the h2g connections, but also all existing g2h connections to the (outmost) host which are totally unrelated. Tested: Executed the following steps on Cuttlefish (Android running on a VM) [1]: (1) Enter into an `adb shell` session - to have a g2h connection inside the VM, (2) open and then close /dev/vhost-vsock by `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb session is not reset. [1] https://android.googlesource.com/device/google/cuttlefish/ Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Signed-off-by: Jiyong Park --- drivers/vhost/vsock.c| 4 net/vmw_vsock/virtio_transport.c | 7 +++ net/vmw_vsock/vmci_transport.c | 5 + 3 files changed, 16 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 37f0b4274113..853ddac00d5b 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk) * executing. */ + /* Only handle our own sockets */ + if (vsk->transport != _transport.transport) + return; + /* If the peer is still valid, no need to reset connection */ if (vhost_vsock_get(vsk->remote_addr.svm_cid)) return; We know this is incomplete though. So I think it's the wrong thing to do when you backport, too. If all you worry about is breaking a binary module interface, how about simply exporting a new function when you backport. Thus you will have downstream both: void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); void vsock_for_each_connected_socket_new(struct vsock_transport *transport, void (*fn)(struct sock *sk)); and then upstream we can squash these two patches. Hmm? Yep, reading more of the kernel documentation [1] it seems that upstream we don't worry about this. I agree with Michael, it's better to just have the final patch upstream and downstream will be handled accordingly. This should make it easier upstream to backport into stable branches future patches that depend on this change. Thanks, Stefano [1] https://www.kernel.org/doc/Documentation/process/stable-api-nonsense.rst ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/2] vsock: each transport cycles only on its own sockets
On Thu, Mar 10, 2022 at 10:11:32PM +0900, Jiyong Park wrote: > Hi Michael, > > Thanks for looking into this. > > Would you mind if I ask what you mean by incomplete? Is it because non-updated > modules will still have the issue? Please elaborate. What stefano wrote: I think there is the same problem if the g2h driver will be unloaded (or a reset event is received after a VM migration), it will close all sockets of the nested h2g. looks like this will keep happening even with your patch, though I didn't try. I also don't like how patch 1 adds code that patch 2 removes. Untidy. Let's just squash and have downstreams worry about stable ABI. > > On Thu, Mar 10, 2022 at 10:02 PM Michael S. Tsirkin wrote: > > > > On Thu, Mar 10, 2022 at 09:54:24PM +0900, Jiyong Park wrote: > > > When iterating over sockets using vsock_for_each_connected_socket, make > > > sure that a transport filters out sockets that don't belong to the > > > transport. > > > > > > There actually was an issue caused by this; in a nested VM > > > configuration, destroying the nested VM (which often involves the > > > closing of /dev/vhost-vsock if there was h2g connections to the nested > > > VM) kills not only the h2g connections, but also all existing g2h > > > connections to the (outmost) host which are totally unrelated. > > > > > > Tested: Executed the following steps on Cuttlefish (Android running on a > > > VM) [1]: (1) Enter into an `adb shell` session - to have a g2h > > > connection inside the VM, (2) open and then close /dev/vhost-vsock by > > > `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb > > > session is not reset. > > > > > > [1] https://android.googlesource.com/device/google/cuttlefish/ > > > > > > Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") > > > Signed-off-by: Jiyong Park > > > --- > > > drivers/vhost/vsock.c| 4 > > > net/vmw_vsock/virtio_transport.c | 7 +++ > > > net/vmw_vsock/vmci_transport.c | 5 + > > > 3 files changed, 16 insertions(+) > > > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > > > index 37f0b4274113..853ddac00d5b 100644 > > > --- a/drivers/vhost/vsock.c > > > +++ b/drivers/vhost/vsock.c > > > @@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock > > > *sk) > > >* executing. > > >*/ > > > > > > + /* Only handle our own sockets */ > > > + if (vsk->transport != _transport.transport) > > > + return; > > > + > > > /* If the peer is still valid, no need to reset connection */ > > > if (vhost_vsock_get(vsk->remote_addr.svm_cid)) > > > return; > > > > > > We know this is incomplete though. So I think it's the wrong thing to do > > when you backport, too. If all you worry about is breaking a binary > > module interface, how about simply exporting a new function when you > > backport. Thus you will have downstream both: > > > > void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); > > > > void vsock_for_each_connected_socket_new(struct vsock_transport *transport, > > void (*fn)(struct sock *sk)); > > > > > > and then upstream we can squash these two patches. > > > > Hmm? > > > > > > > diff --git a/net/vmw_vsock/virtio_transport.c > > > b/net/vmw_vsock/virtio_transport.c > > > index fb3302fff627..61b24eb31d4b 100644 > > > --- a/net/vmw_vsock/virtio_transport.c > > > +++ b/net/vmw_vsock/virtio_transport.c > > > @@ -24,6 +24,7 @@ > > > static struct workqueue_struct *virtio_vsock_workqueue; > > > static struct virtio_vsock __rcu *the_virtio_vsock; > > > static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects > > > the_virtio_vsock */ > > > +static struct virtio_transport virtio_transport; /* forward declaration > > > */ > > > > > > struct virtio_vsock { > > > struct virtio_device *vdev; > > > @@ -357,11 +358,17 @@ static void virtio_vsock_event_fill(struct > > > virtio_vsock *vsock) > > > > > > static void virtio_vsock_reset_sock(struct sock *sk) > > > { > > > + struct vsock_sock *vsk = vsock_sk(sk); > > > + > > > /* vmci_transport.c doesn't take sk_lock here either. At least > > > we're > > >* under vsock_table_lock so the sock cannot disappear while we're > > >* executing. > > >*/ > > > > > > + /* Only handle our own sockets */ > > > + if (vsk->transport != _transport.transport) > > > + return; > > > + > > > sk->sk_state = TCP_CLOSE; > > > sk->sk_err = ECONNRESET; > > > sk_error_report(sk); > > > diff --git a/net/vmw_vsock/vmci_transport.c > > > b/net/vmw_vsock/vmci_transport.c > > > index 7aef34e32bdf..cd2f01513fae 100644 > > > --- a/net/vmw_vsock/vmci_transport.c > > > +++ b/net/vmw_vsock/vmci_transport.c > > > @@ -803,6 +803,11 @@ static void vmci_transport_handle_detach(struct sock > > > *sk) > > > struct vsock_sock *vsk; > > > > > > vsk = vsock_sk(sk); > > > + > > > +
Re: [PATCH 2/2] vsock: refactor vsock_for_each_connected_socket
On Thu, Mar 10, 2022 at 09:54:25PM +0900, Jiyong Park wrote: > vsock_for_each_connected_socket now cycles over sockets of a specific > transport only, rather than asking callers to do the filtering manually, > which is error-prone. > > Signed-off-by: Jiyong Park Pls just squash these two patches. Downstream will do its own thing, probably distict from your patch 1 and depending on what its requirements are. > --- > drivers/vhost/vsock.c| 7 ++- > include/net/af_vsock.h | 3 ++- > net/vmw_vsock/af_vsock.c | 9 +++-- > net/vmw_vsock/virtio_transport.c | 12 > net/vmw_vsock/vmci_transport.c | 8 ++-- > 5 files changed, 17 insertions(+), 22 deletions(-) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index 853ddac00d5b..e6c9d41db1de 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -722,10 +722,6 @@ static void vhost_vsock_reset_orphans(struct sock *sk) >* executing. >*/ > > - /* Only handle our own sockets */ > - if (vsk->transport != _transport.transport) > - return; > - > /* If the peer is still valid, no need to reset connection */ > if (vhost_vsock_get(vsk->remote_addr.svm_cid)) > return; > @@ -757,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, > struct file *file) > > /* Iterating over all connections for all CIDs to find orphans is >* inefficient. Room for improvement here. */ > - vsock_for_each_connected_socket(vhost_vsock_reset_orphans); > + vsock_for_each_connected_socket(_transport.transport, > + vhost_vsock_reset_orphans); > > /* Don't check the owner, because we are in the release path, so we >* need to stop the vsock device in any case. > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h > index ab207677e0a8..f742e50207fb 100644 > --- a/include/net/af_vsock.h > +++ b/include/net/af_vsock.h > @@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm > *addr); > struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, >struct sockaddr_vm *dst); > void vsock_remove_sock(struct vsock_sock *vsk); > -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); > +void vsock_for_each_connected_socket(struct vsock_transport *transport, > + void (*fn)(struct sock *sk)); > int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); > bool vsock_find_cid(unsigned int cid); > > diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c > index 38baeb189d4e..f04abf662ec6 100644 > --- a/net/vmw_vsock/af_vsock.c > +++ b/net/vmw_vsock/af_vsock.c > @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) > } > EXPORT_SYMBOL_GPL(vsock_remove_sock); > > -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) > +void vsock_for_each_connected_socket(struct vsock_transport *transport, > + void (*fn)(struct sock *sk)) > { > int i; > > @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct > sock *sk)) > for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { > struct vsock_sock *vsk; > list_for_each_entry(vsk, _connected_table[i], > - connected_table) > + connected_table) { > + if (vsk->transport != transport) > + continue; > + > fn(sk_vsock(vsk)); > + } > } > > spin_unlock_bh(_table_lock); > diff --git a/net/vmw_vsock/virtio_transport.c > b/net/vmw_vsock/virtio_transport.c > index 61b24eb31d4b..5afc194a58bb 100644 > --- a/net/vmw_vsock/virtio_transport.c > +++ b/net/vmw_vsock/virtio_transport.c > @@ -358,17 +358,11 @@ static void virtio_vsock_event_fill(struct virtio_vsock > *vsock) > > static void virtio_vsock_reset_sock(struct sock *sk) > { > - struct vsock_sock *vsk = vsock_sk(sk); > - > /* vmci_transport.c doesn't take sk_lock here either. At least we're >* under vsock_table_lock so the sock cannot disappear while we're >* executing. >*/ > > - /* Only handle our own sockets */ > - if (vsk->transport != _transport.transport) > - return; > - > sk->sk_state = TCP_CLOSE; > sk->sk_err = ECONNRESET; > sk_error_report(sk); > @@ -391,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock > *vsock, > switch (le32_to_cpu(event->id)) { > case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: > virtio_vsock_update_guest_cid(vsock); > - vsock_for_each_connected_socket(virtio_vsock_reset_sock); > + vsock_for_each_connected_socket(_transport.transport, > +
Re: [PATCH v7 09/26] virtio_ring: split: implement virtqueue_reset_vring_split()
On Thu, Mar 10, 2022 at 08:33:30PM +0800, Xuan Zhuo wrote: > On Thu, 10 Mar 2022 07:17:09 -0500, "Michael S. Tsirkin" > wrote: > > On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote: > > > On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" > > > wrote: > > > > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote: > > > > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" > > > > > wrote: > > > > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote: > > > > > > > virtio ring supports reset. > > > > > > > > > > > > > > Queue reset is divided into several stages. > > > > > > > > > > > > > > 1. notify device queue reset > > > > > > > 2. vring release > > > > > > > 3. attach new vring > > > > > > > 4. notify device queue re-enable > > > > > > > > > > > > > > After the first step is completed, the vring reset operation can > > > > > > > be > > > > > > > performed. If the newly set vring num does not change, then just > > > > > > > reset > > > > > > > the vq related value. > > > > > > > > > > > > > > Otherwise, the vring will be released and the vring will be > > > > > > > reallocated. > > > > > > > And the vring will be attached to the vq. If this process fails, > > > > > > > the > > > > > > > function will exit, and the state of the vq will be the vring > > > > > > > release > > > > > > > state. You can call this function again to reallocate the vring. > > > > > > > > > > > > > > In addition, vring_align, may_reduce_num are necessary for > > > > > > > reallocating > > > > > > > vring, so they are retained when creating vq. > > > > > > > > > > > > > > Signed-off-by: Xuan Zhuo > > > > > > > --- > > > > > > > drivers/virtio/virtio_ring.c | 69 > > > > > > > > > > > > > > 1 file changed, 69 insertions(+) > > > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c > > > > > > > b/drivers/virtio/virtio_ring.c > > > > > > > index e0422c04c903..148fb1fd3d5a 100644 > > > > > > > --- a/drivers/virtio/virtio_ring.c > > > > > > > +++ b/drivers/virtio/virtio_ring.c > > > > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue { > > > > > > > /* DMA address and size information */ > > > > > > > dma_addr_t queue_dma_addr; > > > > > > > size_t queue_size_in_bytes; > > > > > > > + > > > > > > > + /* The parameters for creating vrings are > > > > > > > reserved for > > > > > > > + * creating new vrings when enabling reset > > > > > > > queue. > > > > > > > + */ > > > > > > > + u32 vring_align; > > > > > > > + bool may_reduce_num; > > > > > > > } split; > > > > > > > > > > > > > > /* Available for packed ring */ > > > > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue { > > > > > > > #endif > > > > > > > }; > > > > > > > > > > > > > > +static void vring_free(struct virtqueue *vq); > > > > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue > > > > > > > *vq, > > > > > > > + struct virtio_device *vdev); > > > > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue > > > > > > > *vq, > > > > > > > + struct virtio_device *vdev, > > > > > > > + struct vring vring); > > > > > > > > > > > > > > /* > > > > > > > * Helpers. > > > > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue > > > > > > > *vring_create_virtqueue_split( > > > > > > > return NULL; > > > > > > > } > > > > > > > > > > > > > > + to_vvq(vq)->split.vring_align = vring_align; > > > > > > > + to_vvq(vq)->split.may_reduce_num = may_reduce_num; > > > > > > > to_vvq(vq)->split.queue_dma_addr = vring.dma_addr; > > > > > > > to_vvq(vq)->split.queue_size_in_bytes = > > > > > > > vring.queue_size_in_bytes; > > > > > > > to_vvq(vq)->we_own_ring = true; > > > > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue > > > > > > > *vring_create_virtqueue_split( > > > > > > > return vq; > > > > > > > } > > > > > > > > > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, > > > > > > > u32 num) > > > > > > > +{ > > > > > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > > > > > + struct virtio_device *vdev = _vq->vdev; > > > > > > > + struct vring_split vring; > > > > > > > + int err; > > > > > > > + > > > > > > > + if (num > _vq->num_max) > > > > > > > + return -E2BIG; > > > > > > > + > > > > > > > + switch (vq->vq.reset) { > > > > > > > + case VIRTIO_VQ_RESET_STEP_NONE: > > > > > > > + return -ENOENT; > > > > > > > + > > > > > > > + case VIRTIO_VQ_RESET_STEP_VRING_ATTACH: > > > > > > > + case VIRTIO_VQ_RESET_STEP_DEVICE: > > > > > > > + if (vq->split.vring.num == num || !num) > > > > > > > + break; > > > > > > > + > > > > > > > + vring_free(_vq); > > > > > > > + > > > > > > > +
Re: [PATCH 1/2] vsock: each transport cycles only on its own sockets
On Thu, Mar 10, 2022 at 09:54:24PM +0900, Jiyong Park wrote: > When iterating over sockets using vsock_for_each_connected_socket, make > sure that a transport filters out sockets that don't belong to the > transport. > > There actually was an issue caused by this; in a nested VM > configuration, destroying the nested VM (which often involves the > closing of /dev/vhost-vsock if there was h2g connections to the nested > VM) kills not only the h2g connections, but also all existing g2h > connections to the (outmost) host which are totally unrelated. > > Tested: Executed the following steps on Cuttlefish (Android running on a > VM) [1]: (1) Enter into an `adb shell` session - to have a g2h > connection inside the VM, (2) open and then close /dev/vhost-vsock by > `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb > session is not reset. > > [1] https://android.googlesource.com/device/google/cuttlefish/ > > Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") > Signed-off-by: Jiyong Park > --- > drivers/vhost/vsock.c| 4 > net/vmw_vsock/virtio_transport.c | 7 +++ > net/vmw_vsock/vmci_transport.c | 5 + > 3 files changed, 16 insertions(+) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index 37f0b4274113..853ddac00d5b 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk) >* executing. >*/ > > + /* Only handle our own sockets */ > + if (vsk->transport != _transport.transport) > + return; > + > /* If the peer is still valid, no need to reset connection */ > if (vhost_vsock_get(vsk->remote_addr.svm_cid)) > return; We know this is incomplete though. So I think it's the wrong thing to do when you backport, too. If all you worry about is breaking a binary module interface, how about simply exporting a new function when you backport. Thus you will have downstream both: void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); void vsock_for_each_connected_socket_new(struct vsock_transport *transport, void (*fn)(struct sock *sk)); and then upstream we can squash these two patches. Hmm? > diff --git a/net/vmw_vsock/virtio_transport.c > b/net/vmw_vsock/virtio_transport.c > index fb3302fff627..61b24eb31d4b 100644 > --- a/net/vmw_vsock/virtio_transport.c > +++ b/net/vmw_vsock/virtio_transport.c > @@ -24,6 +24,7 @@ > static struct workqueue_struct *virtio_vsock_workqueue; > static struct virtio_vsock __rcu *the_virtio_vsock; > static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ > +static struct virtio_transport virtio_transport; /* forward declaration */ > > struct virtio_vsock { > struct virtio_device *vdev; > @@ -357,11 +358,17 @@ static void virtio_vsock_event_fill(struct virtio_vsock > *vsock) > > static void virtio_vsock_reset_sock(struct sock *sk) > { > + struct vsock_sock *vsk = vsock_sk(sk); > + > /* vmci_transport.c doesn't take sk_lock here either. At least we're >* under vsock_table_lock so the sock cannot disappear while we're >* executing. >*/ > > + /* Only handle our own sockets */ > + if (vsk->transport != _transport.transport) > + return; > + > sk->sk_state = TCP_CLOSE; > sk->sk_err = ECONNRESET; > sk_error_report(sk); > diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c > index 7aef34e32bdf..cd2f01513fae 100644 > --- a/net/vmw_vsock/vmci_transport.c > +++ b/net/vmw_vsock/vmci_transport.c > @@ -803,6 +803,11 @@ static void vmci_transport_handle_detach(struct sock *sk) > struct vsock_sock *vsk; > > vsk = vsock_sk(sk); > + > + /* Only handle our own sockets */ > + if (vsk->transport != _transport) > + return; > + > if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { > sock_set_flag(sk, SOCK_DONE); > > -- > 2.35.1.723.g4982287a31-goog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 0/2] vsock: cycle only on its own socket
On Thu, Mar 10, 2022 at 07:57:58AM -0500, Michael S. Tsirkin wrote: > On Thu, Mar 10, 2022 at 09:54:23PM +0900, Jiyong Park wrote: > > Hi Stefano, > > > > As suggested [1], I've made two patches for easier backporting without > > breaking KMI. > > > > PATCH 1 fixes the very issue of cycling all vsocks regardless of the > > transport and shall be backported. > > > > PATCH 2 is a refactor of PATCH 1 that forces the filtering to all > > (including future) uses of vsock_for_each_connected_socket. > > > > Thanks, > > > > [1] > > https://lore.kernel.org/lkml/20220310110036.fgy323c4hvk3mziq@sgarzare-redhat/ > > > OK that's better. Pls do include changelog in the future. > > Acked-by: Michael S. Tsirkin Hmm actually I think I have a better idea. Hang on. > > > > Jiyong Park (2): > > vsock: each transport cycles only on its own sockets > > vsock: refactor vsock_for_each_connected_socket > > > > drivers/vhost/vsock.c| 3 ++- > > include/net/af_vsock.h | 3 ++- > > net/vmw_vsock/af_vsock.c | 9 +++-- > > net/vmw_vsock/virtio_transport.c | 7 +-- > > net/vmw_vsock/vmci_transport.c | 3 ++- > > 5 files changed, 18 insertions(+), 7 deletions(-) > > > > > > base-commit: 3bf7edc84a9eb4007dd9a0cb8878a7e1d5ec6a3b > > -- > > 2.35.1.723.g4982287a31-goog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 0/2] vsock: cycle only on its own socket
On Thu, Mar 10, 2022 at 09:54:23PM +0900, Jiyong Park wrote: > Hi Stefano, > > As suggested [1], I've made two patches for easier backporting without > breaking KMI. > > PATCH 1 fixes the very issue of cycling all vsocks regardless of the > transport and shall be backported. > > PATCH 2 is a refactor of PATCH 1 that forces the filtering to all > (including future) uses of vsock_for_each_connected_socket. > > Thanks, > > [1] > https://lore.kernel.org/lkml/20220310110036.fgy323c4hvk3mziq@sgarzare-redhat/ OK that's better. Pls do include changelog in the future. Acked-by: Michael S. Tsirkin > Jiyong Park (2): > vsock: each transport cycles only on its own sockets > vsock: refactor vsock_for_each_connected_socket > > drivers/vhost/vsock.c| 3 ++- > include/net/af_vsock.h | 3 ++- > net/vmw_vsock/af_vsock.c | 9 +++-- > net/vmw_vsock/virtio_transport.c | 7 +-- > net/vmw_vsock/vmci_transport.c | 3 ++- > 5 files changed, 18 insertions(+), 7 deletions(-) > > > base-commit: 3bf7edc84a9eb4007dd9a0cb8878a7e1d5ec6a3b > -- > 2.35.1.723.g4982287a31-goog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/2] vsock: each transport cycles only on its own sockets
On Thu, Mar 10, 2022 at 07:53:25AM -0500, Michael S. Tsirkin wrote: > This message had > In-Reply-To: <20220310124936.4179591-1-jiy...@google.com> > in its header but 20220310124936.4179591-2-jiy...@google.com was > not sent to the list. > Please don't do that. Instead, please write and send a proper > cover letter. Thanks! > Also, pls version in subject e.g. PATCH v2, and include full changelog in the cover letter. Thanks! > On Thu, Mar 10, 2022 at 09:49:35PM +0900, Jiyong Park wrote: > > When iterating over sockets using vsock_for_each_connected_socket, make > > sure that a transport filters out sockets that don't belong to the > > transport. > > > > There actually was an issue caused by this; in a nested VM > > configuration, destroying the nested VM (which often involves the > > closing of /dev/vhost-vsock if there was h2g connections to the nested > > VM) kills not only the h2g connections, but also all existing g2h > > connections to the (outmost) host which are totally unrelated. > > > > Tested: Executed the following steps on Cuttlefish (Android running on a > > VM) [1]: (1) Enter into an `adb shell` session - to have a g2h > > connection inside the VM, (2) open and then close /dev/vhost-vsock by > > `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb > > session is not reset. > > > > [1] https://android.googlesource.com/device/google/cuttlefish/ > > > > Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") > > Signed-off-by: Jiyong Park > > --- > > drivers/vhost/vsock.c| 4 > > net/vmw_vsock/virtio_transport.c | 7 +++ > > net/vmw_vsock/vmci_transport.c | 5 + > > 3 files changed, 16 insertions(+) > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > > index 37f0b4274113..853ddac00d5b 100644 > > --- a/drivers/vhost/vsock.c > > +++ b/drivers/vhost/vsock.c > > @@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk) > > * executing. > > */ > > > > + /* Only handle our own sockets */ > > + if (vsk->transport != _transport.transport) > > + return; > > + > > /* If the peer is still valid, no need to reset connection */ > > if (vhost_vsock_get(vsk->remote_addr.svm_cid)) > > return; > > diff --git a/net/vmw_vsock/virtio_transport.c > > b/net/vmw_vsock/virtio_transport.c > > index fb3302fff627..61b24eb31d4b 100644 > > --- a/net/vmw_vsock/virtio_transport.c > > +++ b/net/vmw_vsock/virtio_transport.c > > @@ -24,6 +24,7 @@ > > static struct workqueue_struct *virtio_vsock_workqueue; > > static struct virtio_vsock __rcu *the_virtio_vsock; > > static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock > > */ > > +static struct virtio_transport virtio_transport; /* forward declaration */ > > > > struct virtio_vsock { > > struct virtio_device *vdev; > > @@ -357,11 +358,17 @@ static void virtio_vsock_event_fill(struct > > virtio_vsock *vsock) > > > > static void virtio_vsock_reset_sock(struct sock *sk) > > { > > + struct vsock_sock *vsk = vsock_sk(sk); > > + > > /* vmci_transport.c doesn't take sk_lock here either. At least we're > > * under vsock_table_lock so the sock cannot disappear while we're > > * executing. > > */ > > > > + /* Only handle our own sockets */ > > + if (vsk->transport != _transport.transport) > > + return; > > + > > sk->sk_state = TCP_CLOSE; > > sk->sk_err = ECONNRESET; > > sk_error_report(sk); > > diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c > > index 7aef34e32bdf..cd2f01513fae 100644 > > --- a/net/vmw_vsock/vmci_transport.c > > +++ b/net/vmw_vsock/vmci_transport.c > > @@ -803,6 +803,11 @@ static void vmci_transport_handle_detach(struct sock > > *sk) > > struct vsock_sock *vsk; > > > > vsk = vsock_sk(sk); > > + > > + /* Only handle our own sockets */ > > + if (vsk->transport != _transport) > > + return; > > + > > if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { > > sock_set_flag(sk, SOCK_DONE); > > > > -- > > 2.35.1.723.g4982287a31-goog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 1/2] vsock: each transport cycles only on its own sockets
This message had In-Reply-To: <20220310124936.4179591-1-jiy...@google.com> in its header but 20220310124936.4179591-2-jiy...@google.com was not sent to the list. Please don't do that. Instead, please write and send a proper cover letter. Thanks! On Thu, Mar 10, 2022 at 09:49:35PM +0900, Jiyong Park wrote: > When iterating over sockets using vsock_for_each_connected_socket, make > sure that a transport filters out sockets that don't belong to the > transport. > > There actually was an issue caused by this; in a nested VM > configuration, destroying the nested VM (which often involves the > closing of /dev/vhost-vsock if there was h2g connections to the nested > VM) kills not only the h2g connections, but also all existing g2h > connections to the (outmost) host which are totally unrelated. > > Tested: Executed the following steps on Cuttlefish (Android running on a > VM) [1]: (1) Enter into an `adb shell` session - to have a g2h > connection inside the VM, (2) open and then close /dev/vhost-vsock by > `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb > session is not reset. > > [1] https://android.googlesource.com/device/google/cuttlefish/ > > Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") > Signed-off-by: Jiyong Park > --- > drivers/vhost/vsock.c| 4 > net/vmw_vsock/virtio_transport.c | 7 +++ > net/vmw_vsock/vmci_transport.c | 5 + > 3 files changed, 16 insertions(+) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index 37f0b4274113..853ddac00d5b 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk) >* executing. >*/ > > + /* Only handle our own sockets */ > + if (vsk->transport != _transport.transport) > + return; > + > /* If the peer is still valid, no need to reset connection */ > if (vhost_vsock_get(vsk->remote_addr.svm_cid)) > return; > diff --git a/net/vmw_vsock/virtio_transport.c > b/net/vmw_vsock/virtio_transport.c > index fb3302fff627..61b24eb31d4b 100644 > --- a/net/vmw_vsock/virtio_transport.c > +++ b/net/vmw_vsock/virtio_transport.c > @@ -24,6 +24,7 @@ > static struct workqueue_struct *virtio_vsock_workqueue; > static struct virtio_vsock __rcu *the_virtio_vsock; > static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ > +static struct virtio_transport virtio_transport; /* forward declaration */ > > struct virtio_vsock { > struct virtio_device *vdev; > @@ -357,11 +358,17 @@ static void virtio_vsock_event_fill(struct virtio_vsock > *vsock) > > static void virtio_vsock_reset_sock(struct sock *sk) > { > + struct vsock_sock *vsk = vsock_sk(sk); > + > /* vmci_transport.c doesn't take sk_lock here either. At least we're >* under vsock_table_lock so the sock cannot disappear while we're >* executing. >*/ > > + /* Only handle our own sockets */ > + if (vsk->transport != _transport.transport) > + return; > + > sk->sk_state = TCP_CLOSE; > sk->sk_err = ECONNRESET; > sk_error_report(sk); > diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c > index 7aef34e32bdf..cd2f01513fae 100644 > --- a/net/vmw_vsock/vmci_transport.c > +++ b/net/vmw_vsock/vmci_transport.c > @@ -803,6 +803,11 @@ static void vmci_transport_handle_detach(struct sock *sk) > struct vsock_sock *vsk; > > vsk = vsock_sk(sk); > + > + /* Only handle our own sockets */ > + if (vsk->transport != _transport) > + return; > + > if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) { > sock_set_flag(sk, SOCK_DONE); > > -- > 2.35.1.723.g4982287a31-goog ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v7 09/26] virtio_ring: split: implement virtqueue_reset_vring_split()
On Thu, 10 Mar 2022 07:17:09 -0500, "Michael S. Tsirkin" wrote: > On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote: > > On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" > > wrote: > > > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote: > > > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" > > > > wrote: > > > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote: > > > > > > virtio ring supports reset. > > > > > > > > > > > > Queue reset is divided into several stages. > > > > > > > > > > > > 1. notify device queue reset > > > > > > 2. vring release > > > > > > 3. attach new vring > > > > > > 4. notify device queue re-enable > > > > > > > > > > > > After the first step is completed, the vring reset operation can be > > > > > > performed. If the newly set vring num does not change, then just > > > > > > reset > > > > > > the vq related value. > > > > > > > > > > > > Otherwise, the vring will be released and the vring will be > > > > > > reallocated. > > > > > > And the vring will be attached to the vq. If this process fails, the > > > > > > function will exit, and the state of the vq will be the vring > > > > > > release > > > > > > state. You can call this function again to reallocate the vring. > > > > > > > > > > > > In addition, vring_align, may_reduce_num are necessary for > > > > > > reallocating > > > > > > vring, so they are retained when creating vq. > > > > > > > > > > > > Signed-off-by: Xuan Zhuo > > > > > > --- > > > > > > drivers/virtio/virtio_ring.c | 69 > > > > > > > > > > > > 1 file changed, 69 insertions(+) > > > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c > > > > > > b/drivers/virtio/virtio_ring.c > > > > > > index e0422c04c903..148fb1fd3d5a 100644 > > > > > > --- a/drivers/virtio/virtio_ring.c > > > > > > +++ b/drivers/virtio/virtio_ring.c > > > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue { > > > > > > /* DMA address and size information */ > > > > > > dma_addr_t queue_dma_addr; > > > > > > size_t queue_size_in_bytes; > > > > > > + > > > > > > + /* The parameters for creating vrings are > > > > > > reserved for > > > > > > +* creating new vrings when enabling reset > > > > > > queue. > > > > > > +*/ > > > > > > + u32 vring_align; > > > > > > + bool may_reduce_num; > > > > > > } split; > > > > > > > > > > > > /* Available for packed ring */ > > > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue { > > > > > > #endif > > > > > > }; > > > > > > > > > > > > +static void vring_free(struct virtqueue *vq); > > > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue > > > > > > *vq, > > > > > > +struct virtio_device *vdev); > > > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue > > > > > > *vq, > > > > > > + struct virtio_device *vdev, > > > > > > + struct vring vring); > > > > > > > > > > > > /* > > > > > > * Helpers. > > > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue > > > > > > *vring_create_virtqueue_split( > > > > > > return NULL; > > > > > > } > > > > > > > > > > > > + to_vvq(vq)->split.vring_align = vring_align; > > > > > > + to_vvq(vq)->split.may_reduce_num = may_reduce_num; > > > > > > to_vvq(vq)->split.queue_dma_addr = vring.dma_addr; > > > > > > to_vvq(vq)->split.queue_size_in_bytes = > > > > > > vring.queue_size_in_bytes; > > > > > > to_vvq(vq)->we_own_ring = true; > > > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue > > > > > > *vring_create_virtqueue_split( > > > > > > return vq; > > > > > > } > > > > > > > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 > > > > > > num) > > > > > > +{ > > > > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > > > > + struct virtio_device *vdev = _vq->vdev; > > > > > > + struct vring_split vring; > > > > > > + int err; > > > > > > + > > > > > > + if (num > _vq->num_max) > > > > > > + return -E2BIG; > > > > > > + > > > > > > + switch (vq->vq.reset) { > > > > > > + case VIRTIO_VQ_RESET_STEP_NONE: > > > > > > + return -ENOENT; > > > > > > + > > > > > > + case VIRTIO_VQ_RESET_STEP_VRING_ATTACH: > > > > > > + case VIRTIO_VQ_RESET_STEP_DEVICE: > > > > > > + if (vq->split.vring.num == num || !num) > > > > > > + break; > > > > > > + > > > > > > + vring_free(_vq); > > > > > > + > > > > > > + fallthrough; > > > > > > + > > > > > > + case VIRTIO_VQ_RESET_STEP_VRING_RELEASE: > > > > > > + if (!num) > > > > > > + num = vq->split.vring.num; > > > > > > + > > > > > > + err = vring_create_vring_split(, vdev, >
Re: [PATCH v7 09/26] virtio_ring: split: implement virtqueue_reset_vring_split()
On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote: > On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" > wrote: > > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote: > > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" > > > wrote: > > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote: > > > > > virtio ring supports reset. > > > > > > > > > > Queue reset is divided into several stages. > > > > > > > > > > 1. notify device queue reset > > > > > 2. vring release > > > > > 3. attach new vring > > > > > 4. notify device queue re-enable > > > > > > > > > > After the first step is completed, the vring reset operation can be > > > > > performed. If the newly set vring num does not change, then just reset > > > > > the vq related value. > > > > > > > > > > Otherwise, the vring will be released and the vring will be > > > > > reallocated. > > > > > And the vring will be attached to the vq. If this process fails, the > > > > > function will exit, and the state of the vq will be the vring release > > > > > state. You can call this function again to reallocate the vring. > > > > > > > > > > In addition, vring_align, may_reduce_num are necessary for > > > > > reallocating > > > > > vring, so they are retained when creating vq. > > > > > > > > > > Signed-off-by: Xuan Zhuo > > > > > --- > > > > > drivers/virtio/virtio_ring.c | 69 > > > > > > > > > > 1 file changed, 69 insertions(+) > > > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c > > > > > b/drivers/virtio/virtio_ring.c > > > > > index e0422c04c903..148fb1fd3d5a 100644 > > > > > --- a/drivers/virtio/virtio_ring.c > > > > > +++ b/drivers/virtio/virtio_ring.c > > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue { > > > > > /* DMA address and size information */ > > > > > dma_addr_t queue_dma_addr; > > > > > size_t queue_size_in_bytes; > > > > > + > > > > > + /* The parameters for creating vrings are > > > > > reserved for > > > > > + * creating new vrings when enabling reset > > > > > queue. > > > > > + */ > > > > > + u32 vring_align; > > > > > + bool may_reduce_num; > > > > > } split; > > > > > > > > > > /* Available for packed ring */ > > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue { > > > > > #endif > > > > > }; > > > > > > > > > > +static void vring_free(struct virtqueue *vq); > > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq, > > > > > + struct virtio_device *vdev); > > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq, > > > > > + struct virtio_device *vdev, > > > > > + struct vring vring); > > > > > > > > > > /* > > > > > * Helpers. > > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue > > > > > *vring_create_virtqueue_split( > > > > > return NULL; > > > > > } > > > > > > > > > > + to_vvq(vq)->split.vring_align = vring_align; > > > > > + to_vvq(vq)->split.may_reduce_num = may_reduce_num; > > > > > to_vvq(vq)->split.queue_dma_addr = vring.dma_addr; > > > > > to_vvq(vq)->split.queue_size_in_bytes = > > > > > vring.queue_size_in_bytes; > > > > > to_vvq(vq)->we_own_ring = true; > > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue > > > > > *vring_create_virtqueue_split( > > > > > return vq; > > > > > } > > > > > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 > > > > > num) > > > > > +{ > > > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > > > + struct virtio_device *vdev = _vq->vdev; > > > > > + struct vring_split vring; > > > > > + int err; > > > > > + > > > > > + if (num > _vq->num_max) > > > > > + return -E2BIG; > > > > > + > > > > > + switch (vq->vq.reset) { > > > > > + case VIRTIO_VQ_RESET_STEP_NONE: > > > > > + return -ENOENT; > > > > > + > > > > > + case VIRTIO_VQ_RESET_STEP_VRING_ATTACH: > > > > > + case VIRTIO_VQ_RESET_STEP_DEVICE: > > > > > + if (vq->split.vring.num == num || !num) > > > > > + break; > > > > > + > > > > > + vring_free(_vq); > > > > > + > > > > > + fallthrough; > > > > > + > > > > > + case VIRTIO_VQ_RESET_STEP_VRING_RELEASE: > > > > > + if (!num) > > > > > + num = vq->split.vring.num; > > > > > + > > > > > + err = vring_create_vring_split(, vdev, > > > > > +vq->split.vring_align, > > > > > +vq->weak_barriers, > > > > > + > > > > > vq->split.may_reduce_num, num); > > > > > +
Re: [PATCH] vhost/vsock: reset only the h2g connections upon release
On Thu, Mar 10, 2022 at 07:41:54PM +0900, Jiyong Park wrote: Hi Stefano, On Thu, Mar 10, 2022 at 5:59 PM Stefano Garzarella wrote: Hi Jiyong, On Thu, Mar 10, 2022 at 05:18:54PM +0900, Jiyong Park wrote: >Filtering non-h2g connections out when determining orphaned connections. >Otherwise, in a nested VM configuration, destroying the nested VM (which >often involves the closing of /dev/vhost-vsock if there was h2g >connections to the nested VM) kills not only the h2g connections, but >also all existing g2h connections to the (outmost) host which are >totally unrelated. > >Tested: Executed the following steps on Cuttlefish (Android running on a >VM) [1]: (1) Enter into an `adb shell` session - to have a g2h >connection inside the VM, (2) open and then close /dev/vhost-vsock by >`exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb >session is not reset. > >[1] https://android.googlesource.com/device/google/cuttlefish/ > >Signed-off-by: Jiyong Park >--- > drivers/vhost/vsock.c | 4 > 1 file changed, 4 insertions(+) > >diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c >index 37f0b4274113..2f6d5d66f5ed 100644 >--- a/drivers/vhost/vsock.c >+++ b/drivers/vhost/vsock.c >@@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk) >* executing. >*/ > >+ /* Only the h2g connections are reset */ >+ if (vsk->transport != _transport.transport) >+ return; >+ > /* If the peer is still valid, no need to reset connection */ > if (vhost_vsock_get(vsk->remote_addr.svm_cid)) > return; >-- >2.35.1.723.g4982287a31-goog > Thanks for your patch! Yes, I see the problem and I think I introduced it with the multi-transports support (ooops). So we should add this fixes tag: Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") IIUC the problem is for all transports that should only cycle on their own sockets. Indeed I think there is the same problem if the g2h driver will be unloaded (or a reset event is received after a VM migration), it will close all sockets of the nested h2g. So I suggest a more generic solution, modifying vsock_for_each_connected_socket() like this (not tested): diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d4e..f04abf662ec6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, +void (*fn)(struct sock *sk)) { int i; @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, _connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } And all transports that call it. Thanks, Stefano Thanks for the suggestion, which looks much better. It actually worked well. Thanks for trying this! By the way, the suggested change will alter the kernel-module interface (KMI), which will make it difficult to land the change on older releases where we'd like to keep the KMI stable [1]. Would it be OK if we let the supplied function (fn) be responsible for checking the transport? I think that there, in the future, might be a case where one needs to cycle over all sockets for inspection or so. I admit that this would be prone to error, though. Please let me know what you think. I don't have a strong preference. I will submit a revision as you want. I see your point, and it makes sense to keep KMI on stable branches, but mainline I would like to have the proposed approach since all transports use it to cycle on their sockets. So we could do a series with 2 patches: - Patch 1 fixes the problem in all transports by checking the transport in the callback (here we insert the fixes tag so we backport it to the stable branches) - Patch 2 moves the check in vsock_for_each_connected_socket() removing it from callbacks so it is less prone to errors and we merge it only mainline What do you think? Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] VMCI: Fix a couple of failure paths in vmci_guest_probe_device()
On Wed, Mar 09, 2022 at 10:24:56AM -0800, Vishnu Dasa wrote: > notification_bitmap may not be released when VMCI_CAPS_DMA_DATAGRAM > capability is missing from the device. Add missing > 'err_free_notification_bitmap' label and use it instead of > 'err_free_data_buffers' to avoid this. > > free_irq() may be called to free an interrupt that was not > allocated. Add missing 'if' statement to check for > exclusive_vectors when freeing interrupt 1. > > Reported-by: Dan Carpenter > Reviewed-by: Bryan Tan > Reviewed-by: Rajesh Jalisatgi > Signed-off-by: Vishnu Dasa > --- > The patches which introduced these bugs are not in any released > kernels nor RC yet, so this fix does not need to be backported. So this has to get into 5.17-final? If not, it should be backported to 5.17, right? You should always include the "Fixes:" tag in the commit message so that we can figure this out. And shouldn't this be 2 different patches? Please break up. thanks, greg k-h ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH] vhost/vsock: reset only the h2g connections upon release
Hi Jiyong, On Thu, Mar 10, 2022 at 05:18:54PM +0900, Jiyong Park wrote: Filtering non-h2g connections out when determining orphaned connections. Otherwise, in a nested VM configuration, destroying the nested VM (which often involves the closing of /dev/vhost-vsock if there was h2g connections to the nested VM) kills not only the h2g connections, but also all existing g2h connections to the (outmost) host which are totally unrelated. Tested: Executed the following steps on Cuttlefish (Android running on a VM) [1]: (1) Enter into an `adb shell` session - to have a g2h connection inside the VM, (2) open and then close /dev/vhost-vsock by `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb session is not reset. [1] https://android.googlesource.com/device/google/cuttlefish/ Signed-off-by: Jiyong Park --- drivers/vhost/vsock.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 37f0b4274113..2f6d5d66f5ed 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk) * executing. */ + /* Only the h2g connections are reset */ + if (vsk->transport != _transport.transport) + return; + /* If the peer is still valid, no need to reset connection */ if (vhost_vsock_get(vsk->remote_addr.svm_cid)) return; -- 2.35.1.723.g4982287a31-goog Thanks for your patch! Yes, I see the problem and I think I introduced it with the multi-transports support (ooops). So we should add this fixes tag: Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") IIUC the problem is for all transports that should only cycle on their own sockets. Indeed I think there is the same problem if the g2h driver will be unloaded (or a reset event is received after a VM migration), it will close all sockets of the nested h2g. So I suggest a more generic solution, modifying vsock_for_each_connected_socket() like this (not tested): diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d4e..f04abf662ec6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, +void (*fn)(struct sock *sk)) { int i; @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, _connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } And all transports that call it. Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v7 17/26] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET
On Wed, 9 Mar 2022 16:54:10 +0800, Jason Wang wrote: > > 在 2022/3/8 下午8:35, Xuan Zhuo 写道: > > This patch implements virtio pci support for QUEUE RESET. > > > > Performing reset on a queue is divided into these steps: > > > > 1. virtio_reset_vq() - notify the device to reset the queue > > 2. virtqueue_detach_unused_buf() - recycle the buffer submitted > > 3. virtqueue_reset_vring()- reset the vring (may re-alloc) > > 4. virtio_enable_resetq() - mmap vring to device, and enable the > > queue > > > > This patch implements virtio_reset_vq(), virtio_enable_resetq() in the > > pci scenario. > > > > Signed-off-by: Xuan Zhuo > > --- > > drivers/virtio/virtio_pci_common.c | 8 +-- > > drivers/virtio/virtio_pci_modern.c | 83 ++ > > 2 files changed, 88 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/virtio/virtio_pci_common.c > > b/drivers/virtio/virtio_pci_common.c > > index fdbde1db5ec5..863d3a8a0956 100644 > > --- a/drivers/virtio/virtio_pci_common.c > > +++ b/drivers/virtio/virtio_pci_common.c > > @@ -248,9 +248,11 @@ static void vp_del_vq(struct virtqueue *vq) > > struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; > > unsigned long flags; > > > > - spin_lock_irqsave(_dev->lock, flags); > > - list_del(>node); > > - spin_unlock_irqrestore(_dev->lock, flags); > > + if (!vq->reset) { > > + spin_lock_irqsave(_dev->lock, flags); > > + list_del(>node); > > + spin_unlock_irqrestore(_dev->lock, flags); > > + } > > > > vp_dev->del_vq(info); > > kfree(info); > > diff --git a/drivers/virtio/virtio_pci_modern.c > > b/drivers/virtio/virtio_pci_modern.c > > index 49a4493732cf..3c67d3607802 100644 > > --- a/drivers/virtio/virtio_pci_modern.c > > +++ b/drivers/virtio/virtio_pci_modern.c > > @@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device > > *vdev, u64 features) > > if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) && > > pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV)) > > __virtio_set_bit(vdev, VIRTIO_F_SR_IOV); > > + > > + if (features & BIT_ULL(VIRTIO_F_RING_RESET)) > > + __virtio_set_bit(vdev, VIRTIO_F_RING_RESET); > > } > > > > /* virtio config->finalize_features() implementation */ > > @@ -199,6 +202,82 @@ static int vp_active_vq(struct virtqueue *vq, u16 > > msix_vec) > > return 0; > > } > > > > +static int vp_modern_reset_vq(struct virtqueue *vq) > > +{ > > + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); > > + struct virtio_pci_modern_device *mdev = _dev->mdev; > > + struct virtio_pci_vq_info *info; > > + unsigned long flags; > > + unsigned int irq; > > + > > + if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET)) > > + return -ENOENT; > > + > > + vp_modern_set_queue_reset(mdev, vq->index); > > + > > + info = vp_dev->vqs[vq->index]; > > + > > + /* delete vq from irq handler */ > > + spin_lock_irqsave(_dev->lock, flags); > > + list_del(>node); > > + spin_unlock_irqrestore(_dev->lock, flags); > > + > > + INIT_LIST_HEAD(>node); > > + > > + vq->reset = VIRTIO_VQ_RESET_STEP_DEVICE; > > + > > + /* sync irq callback. */ > > + if (vp_dev->intx_enabled) { > > + irq = vp_dev->pci_dev->irq; > > + > > + } else { > > + if (info->msix_vector == VIRTIO_MSI_NO_VECTOR) > > + return 0; > > + > > + irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); > > + } > > + > > + synchronize_irq(irq); > > > Synchronize_irq() is not sufficient here since it breaks the effort of > the interrupt hardening which is done by commits: > > 080cd7c3ac87 virtio-pci: harden INTX interrupts > 9e35276a5344 virtio_pci: harden MSI-X interrupts > > Unfortunately 080cd7c3ac87 introduces an issue that disable_irq() were > used for the affinity managed irq but we're discussing a fix. > ok, I think disable_irq() is still used here. I want to determine the solution for this detail first. So I posted the code, I hope Jason can help confirm this point first. There are three situations in which vq corresponds to an interrupt 1. intx 2. msix: per vq vectors 2. msix: share irq Essentially can be divided into two categories: per vq vectors and share irq. For share irq is based on virtqueues to find vq, so I think it is safe as long as list_del() is executed under the protection of the lock. In the case of per vq vectors, disable_irq() is used. Thanks. +static int vp_modern_reset_vq(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_modern_device *mdev = _dev->mdev; + struct virtio_pci_vq_info *info; + unsigned long flags; + unsigned int irq; + + if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET)) + return -ENOENT; + + vp_modern_set_queue_reset(mdev, vq->index); + + info = vp_dev->vqs[vq->index]; + +
Re: [PATCH v7 09/26] virtio_ring: split: implement virtqueue_reset_vring_split()
On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" wrote: > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote: > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" > > wrote: > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote: > > > > virtio ring supports reset. > > > > > > > > Queue reset is divided into several stages. > > > > > > > > 1. notify device queue reset > > > > 2. vring release > > > > 3. attach new vring > > > > 4. notify device queue re-enable > > > > > > > > After the first step is completed, the vring reset operation can be > > > > performed. If the newly set vring num does not change, then just reset > > > > the vq related value. > > > > > > > > Otherwise, the vring will be released and the vring will be reallocated. > > > > And the vring will be attached to the vq. If this process fails, the > > > > function will exit, and the state of the vq will be the vring release > > > > state. You can call this function again to reallocate the vring. > > > > > > > > In addition, vring_align, may_reduce_num are necessary for reallocating > > > > vring, so they are retained when creating vq. > > > > > > > > Signed-off-by: Xuan Zhuo > > > > --- > > > > drivers/virtio/virtio_ring.c | 69 > > > > 1 file changed, 69 insertions(+) > > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > > index e0422c04c903..148fb1fd3d5a 100644 > > > > --- a/drivers/virtio/virtio_ring.c > > > > +++ b/drivers/virtio/virtio_ring.c > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue { > > > > /* DMA address and size information */ > > > > dma_addr_t queue_dma_addr; > > > > size_t queue_size_in_bytes; > > > > + > > > > + /* The parameters for creating vrings are > > > > reserved for > > > > +* creating new vrings when enabling reset > > > > queue. > > > > +*/ > > > > + u32 vring_align; > > > > + bool may_reduce_num; > > > > } split; > > > > > > > > /* Available for packed ring */ > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue { > > > > #endif > > > > }; > > > > > > > > +static void vring_free(struct virtqueue *vq); > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq, > > > > +struct virtio_device *vdev); > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq, > > > > + struct virtio_device *vdev, > > > > + struct vring vring); > > > > > > > > /* > > > > * Helpers. > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue > > > > *vring_create_virtqueue_split( > > > > return NULL; > > > > } > > > > > > > > + to_vvq(vq)->split.vring_align = vring_align; > > > > + to_vvq(vq)->split.may_reduce_num = may_reduce_num; > > > > to_vvq(vq)->split.queue_dma_addr = vring.dma_addr; > > > > to_vvq(vq)->split.queue_size_in_bytes = > > > > vring.queue_size_in_bytes; > > > > to_vvq(vq)->we_own_ring = true; > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue > > > > *vring_create_virtqueue_split( > > > > return vq; > > > > } > > > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num) > > > > +{ > > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > > + struct virtio_device *vdev = _vq->vdev; > > > > + struct vring_split vring; > > > > + int err; > > > > + > > > > + if (num > _vq->num_max) > > > > + return -E2BIG; > > > > + > > > > + switch (vq->vq.reset) { > > > > + case VIRTIO_VQ_RESET_STEP_NONE: > > > > + return -ENOENT; > > > > + > > > > + case VIRTIO_VQ_RESET_STEP_VRING_ATTACH: > > > > + case VIRTIO_VQ_RESET_STEP_DEVICE: > > > > + if (vq->split.vring.num == num || !num) > > > > + break; > > > > + > > > > + vring_free(_vq); > > > > + > > > > + fallthrough; > > > > + > > > > + case VIRTIO_VQ_RESET_STEP_VRING_RELEASE: > > > > + if (!num) > > > > + num = vq->split.vring.num; > > > > + > > > > + err = vring_create_vring_split(, vdev, > > > > + vq->split.vring_align, > > > > + vq->weak_barriers, > > > > + > > > > vq->split.may_reduce_num, num); > > > > + if (err) > > > > + return -ENOMEM; > > > > + > > > > + err = __vring_virtqueue_attach_split(vq, vdev, > > > > vring.vring); > > > > + if (err) { > > > > +
Re: [PATCH v7 09/26] virtio_ring: split: implement virtqueue_reset_vring_split()
On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote: > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" > wrote: > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote: > > > virtio ring supports reset. > > > > > > Queue reset is divided into several stages. > > > > > > 1. notify device queue reset > > > 2. vring release > > > 3. attach new vring > > > 4. notify device queue re-enable > > > > > > After the first step is completed, the vring reset operation can be > > > performed. If the newly set vring num does not change, then just reset > > > the vq related value. > > > > > > Otherwise, the vring will be released and the vring will be reallocated. > > > And the vring will be attached to the vq. If this process fails, the > > > function will exit, and the state of the vq will be the vring release > > > state. You can call this function again to reallocate the vring. > > > > > > In addition, vring_align, may_reduce_num are necessary for reallocating > > > vring, so they are retained when creating vq. > > > > > > Signed-off-by: Xuan Zhuo > > > --- > > > drivers/virtio/virtio_ring.c | 69 > > > 1 file changed, 69 insertions(+) > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > index e0422c04c903..148fb1fd3d5a 100644 > > > --- a/drivers/virtio/virtio_ring.c > > > +++ b/drivers/virtio/virtio_ring.c > > > @@ -158,6 +158,12 @@ struct vring_virtqueue { > > > /* DMA address and size information */ > > > dma_addr_t queue_dma_addr; > > > size_t queue_size_in_bytes; > > > + > > > + /* The parameters for creating vrings are reserved for > > > + * creating new vrings when enabling reset queue. > > > + */ > > > + u32 vring_align; > > > + bool may_reduce_num; > > > } split; > > > > > > /* Available for packed ring */ > > > @@ -217,6 +223,12 @@ struct vring_virtqueue { > > > #endif > > > }; > > > > > > +static void vring_free(struct virtqueue *vq); > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq, > > > + struct virtio_device *vdev); > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq, > > > + struct virtio_device *vdev, > > > + struct vring vring); > > > > > > /* > > > * Helpers. > > > @@ -1012,6 +1024,8 @@ static struct virtqueue > > > *vring_create_virtqueue_split( > > > return NULL; > > > } > > > > > > + to_vvq(vq)->split.vring_align = vring_align; > > > + to_vvq(vq)->split.may_reduce_num = may_reduce_num; > > > to_vvq(vq)->split.queue_dma_addr = vring.dma_addr; > > > to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes; > > > to_vvq(vq)->we_own_ring = true; > > > @@ -1019,6 +1033,59 @@ static struct virtqueue > > > *vring_create_virtqueue_split( > > > return vq; > > > } > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num) > > > +{ > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > + struct virtio_device *vdev = _vq->vdev; > > > + struct vring_split vring; > > > + int err; > > > + > > > + if (num > _vq->num_max) > > > + return -E2BIG; > > > + > > > + switch (vq->vq.reset) { > > > + case VIRTIO_VQ_RESET_STEP_NONE: > > > + return -ENOENT; > > > + > > > + case VIRTIO_VQ_RESET_STEP_VRING_ATTACH: > > > + case VIRTIO_VQ_RESET_STEP_DEVICE: > > > + if (vq->split.vring.num == num || !num) > > > + break; > > > + > > > + vring_free(_vq); > > > + > > > + fallthrough; > > > + > > > + case VIRTIO_VQ_RESET_STEP_VRING_RELEASE: > > > + if (!num) > > > + num = vq->split.vring.num; > > > + > > > + err = vring_create_vring_split(, vdev, > > > +vq->split.vring_align, > > > +vq->weak_barriers, > > > +vq->split.may_reduce_num, num); > > > + if (err) > > > + return -ENOMEM; > > > + > > > + err = __vring_virtqueue_attach_split(vq, vdev, vring.vring); > > > + if (err) { > > > + vring_free_queue(vdev, vring.queue_size_in_bytes, > > > + vring.queue, > > > + vring.dma_addr); > > > + return -ENOMEM; > > > + } > > > + > > > + vq->split.queue_dma_addr = vring.dma_addr; > > > + vq->split.queue_size_in_bytes = vring.queue_size_in_bytes; > > > + } > > > + > > > + __vring_virtqueue_init_split(vq, vdev); > > > + vq->we_own_ring = true; > > > + vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH; > > > + > > > + return 0; > > > +} > > > + > > > > I kind of dislike this state machine. > > > > Hacks like special-casing