On 08/06/2026 12:37, David Laight wrote:
> On Mon, 8 Jun 2026 11:10:24 +0300
> Arseniy Krasnov <[email protected]> wrote:
>
>> On 05/06/2026 18:08, David Laight wrote:
>>> On Fri,  5 Jun 2026 14:53:14 +0300
>>> Arseniy Krasnov <[email protected]> wrote:
>>>  
>>>> Logically it was based on TCP implementation, so to make further
>>>> support easier, rewrite it in the TCP way.
>>>>
>>>> Signed-off-by: Arseniy Krasnov <[email protected]>
>>>> ---
>>>>  net/vmw_vsock/virtio_transport_common.c | 64 ++++++++++++-------------
>>>>  1 file changed, 32 insertions(+), 32 deletions(-)
>>>>
>>>> diff --git a/net/vmw_vsock/virtio_transport_common.c 
>>>> b/net/vmw_vsock/virtio_transport_common.c
>>>> index 2fd9eaaf5ca6..00caeeaa5590 100644
>>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>>> @@ -73,10 +73,13 @@ static bool virtio_transport_can_zcopy(const struct 
>>>> virtio_transport *t_ops,
>>>>  static int virtio_transport_fill_skb(struct sk_buff *skb,
>>>>                                 struct virtio_vsock_pkt_info *info,
>>>>                                 size_t len,
>>>> -                               bool zcopy)
>>>> +                               bool zcopy, struct ubuf_info *uarg)
>>>>  {
>>>>    struct msghdr *msg = info->msg;
>>>>  
>>>> +  /* We have completion - attach it to 'skb'. */
>>>> +  skb_zcopy_set(skb, uarg, NULL);
>>>> +
>>>>    if (zcopy)
>>>>            return __zerocopy_sg_from_iter(msg, NULL, skb,
>>>>                                           &msg->msg_iter, len, NULL);
>>>> @@ -208,7 +211,8 @@ static struct sk_buff 
>>>> *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>>>                                              u32 src_cid,
>>>>                                              u32 src_port,
>>>>                                              u32 dst_cid,
>>>> -                                            u32 dst_port)
>>>> +                                            u32 dst_port,
>>>> +                                            struct ubuf_info *uarg)
>>>>  {
>>>>    struct vsock_sock *vsk;
>>>>    struct sk_buff *skb;
>>>> @@ -245,7 +249,7 @@ static struct sk_buff 
>>>> *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *
>>>>    if (info->msg && payload_len > 0) {
>>>>            int err;
>>>>  
>>>> -          err = virtio_transport_fill_skb(skb, info, payload_len, zcopy);
>>>> +          err = virtio_transport_fill_skb(skb, info, payload_len, zcopy, 
>>>> uarg);
>>>>            if (err)
>>>>                    goto out;
>>>>  
>>>> @@ -321,38 +325,36 @@ static int virtio_transport_send_pkt_info(struct 
>>>> vsock_sock *vsk,
>>>>    if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>>>            return pkt_len;
>>>>  
>>>> -  if (info->msg) {
>>>> -          /* If zerocopy is not enabled by 'setsockopt()', we behave as
>>>> -           * there is no MSG_ZEROCOPY flag set.
>>>> +  if (info->msg && (info->msg->msg_flags & MSG_ZEROCOPY)) {
>>>> +          /* If 'info->msg' is not NULL, this is only VIRTIO_VSOCK_OP_RW.
>>>> +           * 'MSG_ZEROCOPY' flag handling here is based on the same flag
>>>> +           * handling from 'tcp_sendmsg_locked()'.
>>>>             */
>>>> -          if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY))
>>>> -                  info->msg->msg_flags &= ~MSG_ZEROCOPY;
>>>> +          if (info->msg->msg_ubuf) {
>>>> +                  uarg = info->msg->msg_ubuf;
>>>> +                  can_zcopy = virtio_transport_can_zcopy(t_ops, info, 
>>>> pkt_len);
>>>> +          } else if (sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) {
>>>> +                  uarg = msg_zerocopy_realloc(sk_vsock(vsk), pkt_len,
>>>> +                                              NULL, false);
>>>> +                  if (!uarg) {
>>>> +                          virtio_transport_put_credit(vvs, pkt_len);
>>>> +                          return -ENOMEM;
>>>> +                  }
>>>>  
>>>> -          if (info->msg->msg_flags & MSG_ZEROCOPY)
>>>>                    can_zcopy = virtio_transport_can_zcopy(t_ops, info, 
>>>> pkt_len);
>>>>  
>>>> +                  if (!can_zcopy)
>>>> +                          uarg_to_msgzc(uarg)->zerocopy = 0;
>>>> +
>>>> +                  have_uref = true;
>>>> +          }
>>>> +
>>>> +          /* 'can_zcopy' means that this transmission will be
>>>> +           * in zerocopy way (e.g. using 'frags' array).
>>>> +           */  
>>> I've not looked at the tcp code, but the above doesn't look right.
>>> I don't see why msg->msg_ubuf might be non-NULL without SOCK_ZEROCOPY set.
>>> That would give the outer code a callback when the last skb is freed but
>>> still copy the data.  
>> Hi, 
>>
>> I guess case when 'msg->msg_ubuf' is non-NULL is special case today for 
>> io_uring MSG_ZEROCOPY implementation.
>> It was added here 
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eb315a7d1396b1139fc7daea55f2d3191e8e7092
>> As I see implementation of its tests in 
>> tools/testing/selftests/net/io_uring_zerocopy_tx.c , it doesn't require 
>> setting SOCK_ZEROCOPY option for
>> socket, so for virtio vsock case I just copied same logic to maintain 
>> compatibility, because there is MSG_ZEROCOPY io_uring test for virtio/vsock.
> That code path probably sets info->msg->msg_flags & MSG_ZEROCOPY so wants 
> 'zerocopy'.
> But wants it's own callback function called rather than one that 
> msg_zerocopy_realloc()
> adds.
>
> But there is no reason why a caller might not just want a notification that
> all the skb associated with the sendmsg have been freed without requesting 
> zerocopy.
> Maybe no one does it today, but it is trivial to support.

Caller for this path is io_uring today. It uses own callback 
'io_tx_ubuf_complete()' which implements notification using io_uring
achitecture instead of classic MSG_ERRQUEUE socket read way.

>
>>
>>> I also don't see the point of calling msg_zerocopy_realloc() to get a
>>> callback when the last skb is freed and then setting
>>>     uarg_to_msgzc(uarg)->zerocopy = 0;
>>> so that the callback doesn't actually do anything.
>>> It isn't as though you 'find out' later on that you can't actually do
>>> zerocopy.  
>>
>> Sorry, what do You mean "last skb" ? In this code we first allocate uarg 
>> (allocate, because third arg is always NULL). Then in
>> loop we allocate sk_buffs, fill it with data and send. I mean first/last skb 
>> will be freed after uarg is already allocated and we
>> don't touch it. I think i didn't understand Your question here.
> The 'uarg' is referenced by all of the skb that contain data for the 
> sendmsg().
> So when the last one of them is freed the callback function is called.
> The purpose of that callback is to 'undo' the zerocopy (page pinning etc).
> But when you set uarg_to_msgzc(uarg)->zerocopy = 0 the callback does nothing.
> So there is no point setting up the callback at all.

Pages are unpinned in sk_buff freeing logic: 'skb_release_data()' -> 
'__skb_frag_unref()'.  'zerocopy' flag of uarg shows 
caller that real zerocopy was used of not - it is checked in 
'__msg_zerocopy_callback()' and 'SO_EE_CODE_ZEROCOPY_COPIED' is set in
the 'ee_code' field of struct 'sock_extended_err' which is read by user. I mean 
idea of MSG_ZEROCOPY API is that if kernel doesn't
return error from 'sendmsg()' call with MSG_ZEROCOPY flag passed, it will send 
notification about data tx complete anyway - it doesn't
matter that real zercopy was done or not.

Thanks


>
> -- David
>
>>
>>>  
>>>>            if (can_zcopy)
>>>>                    max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE,
>>>>                                        (MAX_SKB_FRAGS * PAGE_SIZE));
>>>> -
>>>> -          if (info->msg->msg_flags & MSG_ZEROCOPY &&
>>>> -              info->op == VIRTIO_VSOCK_OP_RW) {
>>>> -                  uarg = info->msg->msg_ubuf;
>>>> -
>>>> -                  if (!uarg) {
>>>> -                          uarg = msg_zerocopy_realloc(sk_vsock(vsk),
>>>> -                                                      pkt_len, NULL, 
>>>> false);
>>>> -                          if (!uarg) {
>>>> -                                  virtio_transport_put_credit(vvs, 
>>>> pkt_len);
>>>> -                                  return -ENOMEM;
>>>> -                          }
>>>> -
>>>> -                          if (!can_zcopy)
>>>> -                                  uarg_to_msgzc(uarg)->zerocopy = 0;
>>>> -
>>>> -                          have_uref = true;
>>>> -                  }
>>>> -          }
>>>>    }
>>>>  
>>>>    rest_len = pkt_len;
>>>> @@ -365,14 +367,12 @@ static int virtio_transport_send_pkt_info(struct 
>>>> vsock_sock *vsk,
>>>>  
>>>>            skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy,
>>>>                                             src_cid, src_port,
>>>> -                                           dst_cid, dst_port);
>>>> +                                           dst_cid, dst_port, uarg);
>>>>            if (!skb) {
>>>>                    ret = -ENOMEM;
>>>>                    break;
>>>>            }
>>>>  
>>>> -          skb_zcopy_set(skb, uarg, NULL);  
>>> Aren't you passing uarg through two function calls instead of doing it here.
>>> Doesn't even make it clearer what is going on.  
>>
>> Agree, to simplify patch, uarg could be set earlier (without passing it to 
>> functions) I guess.
>>
>> Thanks
>>
>>
>>> -- David
>>>  
>>>> -
>>>>            virtio_transport_inc_tx_pkt(vvs, skb);
>>>>  
>>>>            ret = t_ops->send_pkt(skb, info->net);
>>>> @@ -1178,7 +1178,7 @@ static int virtio_transport_reset_no_sock(const 
>>>> struct virtio_transport *t,
>>>>                                       le64_to_cpu(hdr->dst_cid),
>>>>                                       le32_to_cpu(hdr->dst_port),
>>>>                                       le64_to_cpu(hdr->src_cid),
>>>> -                                     le32_to_cpu(hdr->src_port));
>>>> +                                     le32_to_cpu(hdr->src_port), NULL);
>>>>    if (!reply)
>>>>            return -ENOMEM;
>>>>    

Reply via email to