Re: [PATCH net-next v2 00/12] vsock/virtio: continue MSG_ZEROCOPY support

2023-10-03 Thread Stefano Garzarella

Hi Arseniy,

On Sun, Oct 01, 2023 at 12:02:56AM +0300, Arseniy Krasnov wrote:

Hello,

this patchset contains second and third parts of another big patchset
for MSG_ZEROCOPY flag support:
https://lore.kernel.org/netdev/20230701063947.3422088-1-avkras...@sberdevices.ru/

During review of this series, Stefano Garzarella 
suggested to split it for three parts to simplify review and merging:

1) virtio and vhost updates (for fragged skbs) (merged to net-next, see
  link below)
2) AF_VSOCK updates (allows to enable MSG_ZEROCOPY mode and read
  tx completions) and update for Documentation/. <-- this patchset
3) Updates for tests and utils. <-- this patchset

Part 1) was merged:
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=71b263e79370348349553ecdf46f4a69eb436dc7

Head for this patchset is:
https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=236f3873b517acfaf949c23bb2d5dec13bfd2da2

Link to v1:
https://lore.kernel.org/netdev/20230922052428.4005676-1-avkras...@salutedevices.com/

Changelog:
v1 -> v2:
* Patchset rebased and tested on new HEAD of net-next (see hash above).
* See per-patch changelog after ---.


Thanks for this new version.
I started to include vsock_uring_test in my test suite and tests are
going well.

I reviewed code patches, I still need to review the tests.
I'll do that by the end of the week, but they looks good!

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net-next v2 10/12] test/vsock: MSG_ZEROCOPY flag tests

2023-10-03 Thread Stefano Garzarella

On Sun, Oct 01, 2023 at 12:03:06AM +0300, Arseniy Krasnov wrote:

This adds three tests for MSG_ZEROCOPY feature:
1) SOCK_STREAM tx with different buffers.
2) SOCK_SEQPACKET tx with different buffers.
3) SOCK_STREAM test to read empty error queue of the socket.

Signed-off-by: Arseniy Krasnov 
---
Changelog:
v1 -> v2:
 * Move 'SOL_VSOCK' and 'VSOCK_RECVERR' from 'util.c' to 'util.h'.

tools/testing/vsock/Makefile  |   2 +-
tools/testing/vsock/util.c| 214 +++
tools/testing/vsock/util.h|  27 ++
tools/testing/vsock/vsock_test.c  |  16 ++
tools/testing/vsock/vsock_test_zerocopy.c | 314 ++
tools/testing/vsock/vsock_test_zerocopy.h |  15 ++
6 files changed, 587 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/vsock/vsock_test_zerocopy.c
create mode 100644 tools/testing/vsock/vsock_test_zerocopy.h

diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index 21a98ba565ab..1a26f60a596c 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
all: test vsock_perf
test: vsock_test vsock_diag_test
-vsock_test: vsock_test.o timeout.o control.o util.o
+vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o
vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
vsock_perf: vsock_perf.o

diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 6779d5008b27..2a641ab38f08 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -11,10 +11,14 @@
#include 
#include 
#include 
+#include 
#include 
#include 
#include 
#include 
+#include 
+#include 
+#include 

#include "timeout.h"
#include "control.h"
@@ -444,3 +448,213 @@ unsigned long hash_djb2(const void *data, size_t len)

return hash;
}
+
+void enable_so_zerocopy(int fd)
+{
+   int val = 1;
+
+   if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, , sizeof(val))) {
+   perror("setsockopt");
+   exit(EXIT_FAILURE);
+   }
+}
+
+static void *mmap_no_fail(size_t bytes)
+{
+   void *res;
+
+   res = mmap(NULL, bytes, PROT_READ | PROT_WRITE,
+  MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
+   if (res == MAP_FAILED) {
+   perror("mmap");
+   exit(EXIT_FAILURE);
+   }
+
+   return res;
+}
+
+size_t iovec_bytes(const struct iovec *iov, size_t iovnum)
+{
+   size_t bytes;
+   int i;
+
+   for (bytes = 0, i = 0; i < iovnum; i++)
+   bytes += iov[i].iov_len;
+
+   return bytes;
+}
+
+static void iovec_random_init(struct iovec *iov,
+ const struct vsock_test_data *test_data)
+{
+   int i;
+
+   for (i = 0; i < test_data->vecs_cnt; i++) {
+   int j;
+
+   if (test_data->vecs[i].iov_base == MAP_FAILED)
+   continue;
+
+   for (j = 0; j < iov[i].iov_len; j++)
+   ((uint8_t *)iov[i].iov_base)[j] = rand() & 0xff;
+   }
+}
+
+unsigned long iovec_hash_djb2(struct iovec *iov, size_t iovnum)
+{
+   unsigned long hash;
+   size_t iov_bytes;
+   size_t offs;
+   void *tmp;
+   int i;
+
+   iov_bytes = iovec_bytes(iov, iovnum);
+
+   tmp = malloc(iov_bytes);
+   if (!tmp) {
+   perror("malloc");
+   exit(EXIT_FAILURE);
+   }
+
+   for (offs = 0, i = 0; i < iovnum; i++) {
+   memcpy(tmp + offs, iov[i].iov_base, iov[i].iov_len);
+   offs += iov[i].iov_len;
+   }
+
+   hash = hash_djb2(tmp, iov_bytes);
+   free(tmp);
+
+   return hash;
+}
+
+struct iovec *iovec_from_test_data(const struct vsock_test_data *test_data)
+{
+   const struct iovec *test_iovec;
+   struct iovec *iovec;
+   int i;
+
+   iovec = malloc(sizeof(*iovec) * test_data->vecs_cnt);
+   if (!iovec) {
+   perror("malloc");
+   exit(EXIT_FAILURE);
+   }
+
+   test_iovec = test_data->vecs;
+
+   for (i = 0; i < test_data->vecs_cnt; i++) {
+   iovec[i].iov_len = test_iovec[i].iov_len;
+   iovec[i].iov_base = mmap_no_fail(test_iovec[i].iov_len);
+
+   if (test_iovec[i].iov_base != MAP_FAILED &&
+   test_iovec[i].iov_base)
+   iovec[i].iov_base += (uintptr_t)test_iovec[i].iov_base;
+   }
+
+   /* Unmap "invalid" elements. */
+   for (i = 0; i < test_data->vecs_cnt; i++) {
+   if (test_iovec[i].iov_base == MAP_FAILED) {
+   if (munmap(iovec[i].iov_base, iovec[i].iov_len)) {
+   perror("munmap");
+   exit(EXIT_FAILURE);
+   }
+   }
+   }
+
+   iovec_random_init(iovec, test_data);
+
+   return iovec;
+}
+
+void free_iovec_test_data(const struct vsock_test_data *test_data,
+   

Re: [PATCH net-next v2 09/12] docs: net: description of MSG_ZEROCOPY for AF_VSOCK

2023-10-03 Thread Stefano Garzarella

On Sun, Oct 01, 2023 at 12:03:05AM +0300, Arseniy Krasnov wrote:

This adds description of MSG_ZEROCOPY flag support for AF_VSOCK type of
socket.

Signed-off-by: Arseniy Krasnov 
---
Documentation/networking/msg_zerocopy.rst | 13 +++--
1 file changed, 11 insertions(+), 2 deletions(-)


Reviewed-by: Stefano Garzarella 



diff --git a/Documentation/networking/msg_zerocopy.rst 
b/Documentation/networking/msg_zerocopy.rst
index b3ea96af9b49..78fb70e748b7 100644
--- a/Documentation/networking/msg_zerocopy.rst
+++ b/Documentation/networking/msg_zerocopy.rst
@@ -7,7 +7,8 @@ Intro
=

The MSG_ZEROCOPY flag enables copy avoidance for socket send calls.
-The feature is currently implemented for TCP and UDP sockets.
+The feature is currently implemented for TCP, UDP and VSOCK (with
+virtio transport) sockets.


Opportunity and Caveats
@@ -174,7 +175,9 @@ read_notification() call in the previous snippet. A 
notification
is encoded in the standard error format, sock_extended_err.

The level and type fields in the control data are protocol family
-specific, IP_RECVERR or IPV6_RECVERR.
+specific, IP_RECVERR or IPV6_RECVERR (for TCP or UDP socket).
+For VSOCK socket, cmsg_level will be SOL_VSOCK and cmsg_type will be
+VSOCK_RECVERR.

Error origin is the new type SO_EE_ORIGIN_ZEROCOPY. ee_errno is zero,
as explained before, to avoid blocking read and write system calls on
@@ -235,12 +238,15 @@ Implementation
Loopback


+For TCP and UDP:
Data sent to local sockets can be queued indefinitely if the receive
process does not read its socket. Unbound notification latency is not
acceptable. For this reason all packets generated with MSG_ZEROCOPY
that are looped to a local socket will incur a deferred copy. This
includes looping onto packet sockets (e.g., tcpdump) and tun devices.

+For VSOCK:
+Data path sent to local sockets is the same as for non-local sockets.

Testing
===
@@ -254,3 +260,6 @@ instance when run with msg_zerocopy.sh between a veth pair 
across
namespaces, the test will not show any improvement. For testing, the
loopback restriction can be temporarily relaxed by making
skb_orphan_frags_rx identical to skb_orphan_frags.
+
+For VSOCK type of socket example can be found in
+tools/testing/vsock/vsock_test_zerocopy.c.
--
2.25.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net-next v2 08/12] vsock: enable setting SO_ZEROCOPY

2023-10-03 Thread Stefano Garzarella

On Sun, Oct 01, 2023 at 12:03:04AM +0300, Arseniy Krasnov wrote:

For AF_VSOCK, zerocopy tx mode depends on transport, so this option must
be set in AF_VSOCK implementation where transport is accessible (if
transport is not set during setting SO_ZEROCOPY: for example socket is
not connected, then SO_ZEROCOPY will be enabled, but once transport will
be assigned, support of this type of transmission will be checked).

To handle SO_ZEROCOPY, AF_VSOCK implementation uses SOCK_CUSTOM_SOCKOPT
bit, thus handling SOL_SOCKET option operations, but all of them except
SO_ZEROCOPY will be forwarded to the generic handler by calling
'sock_setsockopt()'.

Signed-off-by: Arseniy Krasnov 
---
Changelog:
v1 -> v2:
 * Place 'sock_valbool_flag()' in a single line.

net/vmw_vsock/af_vsock.c | 45 ++--
1 file changed, 43 insertions(+), 2 deletions(-)


Reviewed-by: Stefano Garzarella 



diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index ff44bab05191..a84f242466cf 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1406,8 +1406,16 @@ static int vsock_connect(struct socket *sock, struct 
sockaddr *addr,
goto out;
}

-   if (vsock_msgzerocopy_allow(transport))
+   if (vsock_msgzerocopy_allow(transport)) {
set_bit(SOCK_SUPPORT_ZC, >sk_socket->flags);
+   } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+   /* If this option was set before 'connect()',
+* when transport was unknown, check that this
+* feature is supported here.
+*/
+   err = -EOPNOTSUPP;
+   goto out;
+   }

err = vsock_auto_bind(vsk);
if (err)
@@ -1643,7 +1651,7 @@ static int vsock_connectible_setsockopt(struct socket 
*sock,
const struct vsock_transport *transport;
u64 val;

-   if (level != AF_VSOCK)
+   if (level != AF_VSOCK && level != SOL_SOCKET)
return -ENOPROTOOPT;

#define COPY_IN(_v)   \
@@ -1666,6 +1674,33 @@ static int vsock_connectible_setsockopt(struct socket 
*sock,

transport = vsk->transport;

+   if (level == SOL_SOCKET) {
+   int zerocopy;
+
+   if (optname != SO_ZEROCOPY) {
+   release_sock(sk);
+   return sock_setsockopt(sock, level, optname, optval, 
optlen);
+   }
+
+   /* Use 'int' type here, because variable to
+* set this option usually has this type.
+*/
+   COPY_IN(zerocopy);
+
+   if (zerocopy < 0 || zerocopy > 1) {
+   err = -EINVAL;
+   goto exit;
+   }
+
+   if (transport && !vsock_msgzerocopy_allow(transport)) {
+   err = -EOPNOTSUPP;
+   goto exit;
+   }
+
+   sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy);
+   goto exit;
+   }
+
switch (optname) {
case SO_VM_SOCKETS_BUFFER_SIZE:
COPY_IN(val);
@@ -2322,6 +2357,12 @@ static int vsock_create(struct net *net, struct socket 
*sock,
}
}

+   /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its
+* proto_ops, so there is no handler for custom logic.
+*/
+   if (sock_type_connectible(sock->type))
+   set_bit(SOCK_CUSTOM_SOCKOPT, >sk_socket->flags);
+
vsock_insert_unbound(vsk);

return 0;
--
2.25.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net-next v2 02/12] vsock: read from socket's error queue

2023-10-03 Thread Stefano Garzarella

On Sun, Oct 01, 2023 at 12:02:58AM +0300, Arseniy Krasnov wrote:

This adds handling of MSG_ERRQUEUE input flag in receive call. This flag
is used to read socket's error queue instead of data queue. Possible
scenario of error queue usage is receiving completions for transmission
with MSG_ZEROCOPY flag. This patch also adds new defines: 'SOL_VSOCK'
and 'VSOCK_RECVERR'.

Signed-off-by: Arseniy Krasnov 
---
Changelog:
v1 -> v2:
 * Place new defines for userspace to the existing file 'vm_sockets.h'
   instead of creating new one.

include/linux/socket.h  | 1 +
include/uapi/linux/vm_sockets.h | 4 
net/vmw_vsock/af_vsock.c| 6 ++
3 files changed, 11 insertions(+)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 39b74d83c7c4..cfcb7e2c3813 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -383,6 +383,7 @@ struct ucred {
#define SOL_MPTCP   284
#define SOL_MCTP285
#define SOL_SMC 286
+#define SOL_VSOCK  287

/* IPX options */
#define IPX_TYPE1
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index c60ca33eac59..b1a66c1a7054 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -191,4 +191,8 @@ struct sockaddr_vm {

#define IOCTL_VM_SOCKETS_GET_LOCAL_CID  _IO(7, 0xb9)

+#define SOL_VSOCK  287
+
+#define VSOCK_RECVERR  1


Please add good documentation for both of them. This is an header
exposed to the user space.


+
#endif /* _UAPI_VM_SOCKETS_H */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index d841f4de33b0..0365382beab6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -110,6 +110,8 @@
#include 
#include 
#include 
+#include 
+#include 


Let's keep the alphabetic order as it was before this change.

`net/af_vsock.h` already includes the `uapi/linux/vm_sockets.h`,
and we also use several defines from it in this file, so you can also
skip it.

On the other end it would be better to directly include the headers that
we use, so it's also okay to keep it. As you prefer.



static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
@@ -2137,6 +2139,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct 
msghdr *msg, size_t len,
int err;

sk = sock->sk;
+
+   if (unlikely(flags & MSG_ERRQUEUE))
+   return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, 
VSOCK_RECVERR);
+
vsk = vsock_sk(sk);
err = 0;

--
2.25.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net-next v2 01/12] vsock: set EPOLLERR on non-empty error queue

2023-10-03 Thread Stefano Garzarella

On Sun, Oct 01, 2023 at 12:02:57AM +0300, Arseniy Krasnov wrote:

If socket's error queue is not empty, EPOLLERR must be set. Otherwise,
reader of error queue won't detect data in it using EPOLLERR bit.
Currently for AF_VSOCK this is actual only with MSG_ZEROCOPY, as this
feature is the only user of an error queue of the socket.

Signed-off-by: Arseniy Krasnov 
---
Changelog:
v1 -> v2:
 * Update commit message by removing 'fix' word.


Reviewed-by: Stefano Garzarella 



net/vmw_vsock/af_vsock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 013b65241b65..d841f4de33b0 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1030,7 +1030,7 @@ static __poll_t vsock_poll(struct file *file, struct 
socket *sock,
poll_wait(file, sk_sleep(sk), wait);
mask = 0;

-   if (sk->sk_err)
+   if (sk->sk_err || !skb_queue_empty_lockless(>sk_error_queue))
/* Signify that there has been an error on this socket. */
mask |= EPOLLERR;

--
2.25.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [LINUX KERNEL PATCH v5 1/2] virtio_pci: Add freeze_mode for virtio_pci_common_cfg

2023-10-03 Thread Michael S. Tsirkin
On Tue, Sep 19, 2023 at 06:46:06PM +0800, Jiqian Chen wrote:
> When guest vm does S3, Qemu will reset and clear some things of virtio
> devices, but guest can't aware that, so that may cause some problems.
> For excample, Qemu calls virtio_reset->virtio_gpu_gl_reset, that will
> destroy render resources of virtio-gpu. As a result, after guest resume,
> the display can't come back and we only saw a black screen. Due to guest
> can't re-create all the resources, so we need to let Qemu not to destroy
> them when S3.
> 
> For above purpose, this patch add a new parameter named freeze_mode to
> struct virtio_pci_common_cfg, and when guest suspends, it can set
> freeze_mode to be FREEZE_S3, so that virtio devices can change their
> reset behavior on Qemu side according to that mode.
> 
> Signed-off-by: Jiqian Chen 
> ---
>  drivers/virtio/virtio.c| 13 +
>  drivers/virtio/virtio_pci_modern.c |  9 +
>  drivers/virtio/virtio_pci_modern_dev.c | 16 
>  include/linux/virtio_config.h  |  1 +
>  include/linux/virtio_pci_modern.h  |  2 ++
>  include/uapi/linux/virtio_pci.h| 16 ++--
>  6 files changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index 3893dc29eb26..b4eb8369d5a1 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -7,6 +7,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /* Unique numbering for virtio devices. */
>  static DEFINE_IDA(virtio_index_ida);
> @@ -486,10 +487,20 @@ void unregister_virtio_device(struct virtio_device *dev)
>  EXPORT_SYMBOL_GPL(unregister_virtio_device);
>  
>  #ifdef CONFIG_PM_SLEEP
> +static void virtio_set_freeze_mode(struct virtio_device *dev, u16 mode)
> +{
> + if (!dev->config->set_freeze_mode)
> + return;
> + might_sleep();
> + dev->config->set_freeze_mode(dev, mode);
> +}
> +
>  int virtio_device_freeze(struct virtio_device *dev)
>  {
>   struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
>  
> + virtio_set_freeze_mode(dev, VIRTIO_PCI_FREEZE_MODE_FREEZE_S3);
> +
>   virtio_config_disable(dev);
>  
>   dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
> @@ -544,6 +555,8 @@ int virtio_device_restore(struct virtio_device *dev)
>  
>   virtio_config_enable(dev);
>  
> + virtio_set_freeze_mode(dev, VIRTIO_PCI_FREEZE_MODE_UNFREEZE);
> +
>   return 0;
>  
>  err:
> diff --git a/drivers/virtio/virtio_pci_modern.c 
> b/drivers/virtio/virtio_pci_modern.c
> index d6bb68ba84e5..846b70919cbd 100644
> --- a/drivers/virtio/virtio_pci_modern.c
> +++ b/drivers/virtio/virtio_pci_modern.c
> @@ -491,6 +491,13 @@ static bool vp_get_shm_region(struct virtio_device *vdev,
>   return true;
>  }
>  
> +static void vp_set_freeze_mode(struct virtio_device *vdev, u16 mode)
> +{
> + struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> +
> + vp_modern_set_freeze_mode(_dev->mdev, mode);
> +}
> +
>  static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
>   .get= NULL,
>   .set= NULL,
> @@ -509,6 +516,7 @@ static const struct virtio_config_ops 
> virtio_pci_config_nodev_ops = {
>   .get_shm_region  = vp_get_shm_region,
>   .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
>   .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> + .set_freeze_mode = vp_set_freeze_mode,
>  };
>  
>  static const struct virtio_config_ops virtio_pci_config_ops = {
> @@ -529,6 +537,7 @@ static const struct virtio_config_ops 
> virtio_pci_config_ops = {
>   .get_shm_region  = vp_get_shm_region,
>   .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
>   .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
> + .set_freeze_mode = vp_set_freeze_mode,
>  };
>  
>  /* the PCI probing function */
> diff --git a/drivers/virtio/virtio_pci_modern_dev.c 
> b/drivers/virtio/virtio_pci_modern_dev.c
> index aad7d9296e77..4a6f7d130b6e 100644
> --- a/drivers/virtio/virtio_pci_modern_dev.c
> +++ b/drivers/virtio/virtio_pci_modern_dev.c
> @@ -203,6 +203,8 @@ static inline void check_offsets(void)
>offsetof(struct virtio_pci_common_cfg, queue_used_lo));
>   BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
>offsetof(struct virtio_pci_common_cfg, queue_used_hi));
> + BUILD_BUG_ON(VIRTIO_PCI_COMMON_F_MODE !=
> +  offsetof(struct virtio_pci_common_cfg, freeze_mode));
>  }
>  
>  /*
> @@ -714,6 +716,20 @@ void __iomem *vp_modern_map_vq_notify(struct 
> virtio_pci_modern_device *mdev,
>  }
>  EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
>  
> +/*
> + * vp_modern_set_freeze_mode - set freeze mode to device
> + * @mdev: the modern virtio-pci device
> + * @mode: the mode set to device
> + */
> +void vp_modern_set_freeze_mode(struct virtio_pci_modern_device *mdev,
> +  u16 mode)
> +{
> + 

Re: [PATCH net v2 5/6] virtio-net: fix the vq coalescing setting for vq resize

2023-10-03 Thread Paolo Abeni
On Mon, 2023-09-25 at 15:53 +0800, Heng Qi wrote:
> According to the definition of virtqueue coalescing spec[1]:
> 
>   Upon disabling and re-enabling a transmit virtqueue, the device MUST set
>   the coalescing parameters of the virtqueue to those configured through the
>   VIRTIO_NET_CTRL_NOTF_COAL_TX_SET command, or, if the driver did not set
>   any TX coalescing parameters, to 0.
> 
>   Upon disabling and re-enabling a receive virtqueue, the device MUST set
>   the coalescing parameters of the virtqueue to those configured through the
>   VIRTIO_NET_CTRL_NOTF_COAL_RX_SET command, or, if the driver did not set
>   any RX coalescing parameters, to 0.
> 
> We need to add this setting for vq resize (ethtool -G) where vq_reset happens.
> 
> [1] https://lists.oasis-open.org/archives/virtio-dev/202303/msg00415.html
> 
> Fixes: 394bd87764b6 ("virtio_net: support per queue interrupt coalesce 
> command")
> Cc: Gavin Li 
> Signed-off-by: Heng Qi 

@Jason, since you commented on v1, waiting for your ack.

> ---
>  drivers/net/virtio_net.c | 27 +++
>  1 file changed, 27 insertions(+)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 12ec3ae19b60..cb19b224419b 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2855,6 +2855,9 @@ static void virtnet_get_ringparam(struct net_device 
> *dev,
>   ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
>  }
>  
> +static int virtnet_send_ctrl_coal_vq_cmd(struct virtnet_info *vi,
> +  u16 vqn, u32 max_usecs, u32 
> max_packets);
> +
>  static int virtnet_set_ringparam(struct net_device *dev,
>struct ethtool_ringparam *ring,
>struct kernel_ethtool_ringparam *kernel_ring,
> @@ -2890,12 +2893,36 @@ static int virtnet_set_ringparam(struct net_device 
> *dev,
>   err = virtnet_tx_resize(vi, sq, ring->tx_pending);
>   if (err)
>   return err;
> +
> + /* Upon disabling and re-enabling a transmit virtqueue, 
> the device must
> +  * set the coalescing parameters of the virtqueue to 
> those configured
> +  * through the VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 
> command, or, if the driver
> +  * did not set any TX coalescing parameters, to 0.
> +  */
> + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(i),
> + 
> vi->intr_coal_tx.max_usecs,
> + 
> vi->intr_coal_tx.max_packets);
> + if (err)
> + return err;
> + /* Save parameters */

As a very minor nit, I guess the comment could be dropped here (similar
to patch 4/6). @Heng Qi: please don't repost just for this, let's wait
for Jason' comments.

Cheers,

Paolo

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH vhost 10/16] vdpa/mlx5: Allow creation/deletion of any given mr struct

2023-10-03 Thread Dragos Tatulea via Virtualization
On Thu, 2023-09-28 at 19:45 +0300, Dragos Tatulea wrote:
> This patch adapts the mr creation/deletion code to be able to work with
> any given mr struct pointer. All the APIs are adapted to take an extra
> parameter for the mr.
> 
> mlx5_vdpa_create/delete_mr doesn't need a ASID parameter anymore. The
> check is done in the caller instead (mlx5_set_map).
> 
> This change is needed for a followup patch which will introduce an
> additional mr for the vq descriptor data.
> 
> Signed-off-by: Dragos Tatulea 
Gentle ping for the remaining patches in this series.
 
> ---
>  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  8 +++--
>  drivers/vdpa/mlx5/core/mr.c    | 53 ++
>  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 10 --
>  3 files changed, 36 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> index e1e6e7aba50e..01d4ee58ccb1 100644
> --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
> @@ -116,10 +116,12 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev,
> u32 *mkey, u32 *in,
>  int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
>  int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
> *iotlb,
>  bool *change_map, unsigned int asid);
> -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
> *iotlb,
> -   unsigned int asid);
> +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
> +   struct mlx5_vdpa_mr *mr,
> +   struct vhost_iotlb *iotlb);
>  void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev);
> -void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
> +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev,
> + struct mlx5_vdpa_mr *mr);
>  int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
> struct vhost_iotlb *iotlb,
> unsigned int asid);
> diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
> index 00dcce190a1f..6f29e8eaabb1 100644
> --- a/drivers/vdpa/mlx5/core/mr.c
> +++ b/drivers/vdpa/mlx5/core/mr.c
> @@ -301,10 +301,13 @@ static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev,
> struct mlx5_vdpa_direct
> sg_free_table(>sg_head);
>  }
>  
> -static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size,
> u8 perm,
> +static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
> +   struct mlx5_vdpa_mr *mr,
> +   u64 start,
> +   u64 size,
> +   u8 perm,
>     struct vhost_iotlb *iotlb)
>  {
> -   struct mlx5_vdpa_mr *mr = >mr;
> struct mlx5_vdpa_direct_mr *dmr;
> struct mlx5_vdpa_direct_mr *n;
> LIST_HEAD(tmp);
> @@ -354,9 +357,10 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
> u64 start, u64 size, u8
>   * indirect memory key that provides access to the enitre address space given
>   * by iotlb.
>   */
> -static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb
> *iotlb)
> +static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
> + struct mlx5_vdpa_mr *mr,
> + struct vhost_iotlb *iotlb)
>  {
> -   struct mlx5_vdpa_mr *mr = >mr;
> struct mlx5_vdpa_direct_mr *dmr;
> struct mlx5_vdpa_direct_mr *n;
> struct vhost_iotlb_map *map;
> @@ -384,7 +388,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
> struct vhost_iotlb *iotlb
>   
> LOG_MAX_KLM_SIZE);
> mr->num_klms += nnuls;
> }
> -   err = add_direct_chain(mvdev, ps, pe - ps,
> pperm, iotlb);
> +   err = add_direct_chain(mvdev, mr, ps, pe - ps,
> pperm, iotlb);
> if (err)
> goto err_chain;
> }
> @@ -393,7 +397,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
> struct vhost_iotlb *iotlb
> pperm = map->perm;
> }
> }
> -   err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
> +   err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
> if (err)
> goto err_chain;
>  
> @@ -489,13 +493,8 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev,
> struct mlx5_vdpa_mr *mr
> }
>  }
>  
> -static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, unsigned int
> asid)
> +static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct
> mlx5_vdpa_mr *mr)
>  {
> -   struct mlx5_vdpa_mr *mr = >mr;
> -
> -   if