On 17.08.2022 08:01, Arseniy Krasnov wrote:
> On 16.08.2022 05:32, Bobby Eshleman wrote:
>> CC'ing [email protected]
>>
>> On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
>>> This patch supports dgram in virtio and on the vhost side.
> Hello,
> 
> sorry, i don't understand, how this maintains message boundaries? Or it
> is unnecessary for SOCK_DGRAM?
> 
> Thanks
>>>
>>> Signed-off-by: Jiang Wang <[email protected]>
>>> Signed-off-by: Bobby Eshleman <[email protected]>
>>> ---
>>>  drivers/vhost/vsock.c                   |   2 +-
>>>  include/net/af_vsock.h                  |   2 +
>>>  include/uapi/linux/virtio_vsock.h       |   1 +
>>>  net/vmw_vsock/af_vsock.c                |  26 +++-
>>>  net/vmw_vsock/virtio_transport.c        |   2 +-
>>>  net/vmw_vsock/virtio_transport_common.c | 173 ++++++++++++++++++++++--
>>>  6 files changed, 186 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>>> index a5d1bdb786fe..3dc72a5647ca 100644
>>> --- a/drivers/vhost/vsock.c
>>> +++ b/drivers/vhost/vsock.c
>>> @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
>>>     int ret;
>>>  
>>>     ret = vsock_core_register(&vhost_transport.transport,
>>> -                             VSOCK_TRANSPORT_F_H2G);
>>> +                             VSOCK_TRANSPORT_F_H2G | 
>>> VSOCK_TRANSPORT_F_DGRAM);
>>>     if (ret < 0)
>>>             return ret;
>>>  
>>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>>> index 1c53c4c4d88f..37e55c81e4df 100644
>>> --- a/include/net/af_vsock.h
>>> +++ b/include/net/af_vsock.h
>>> @@ -78,6 +78,8 @@ struct vsock_sock {
>>>  s64 vsock_stream_has_data(struct vsock_sock *vsk);
>>>  s64 vsock_stream_has_space(struct vsock_sock *vsk);
>>>  struct sock *vsock_create_connected(struct sock *parent);
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> +                 struct sockaddr_vm *addr);
>>>  
>>>  /**** TRANSPORT ****/
>>>  
>>> diff --git a/include/uapi/linux/virtio_vsock.h 
>>> b/include/uapi/linux/virtio_vsock.h
>>> index 857df3a3a70d..0975b9c88292 100644
>>> --- a/include/uapi/linux/virtio_vsock.h
>>> +++ b/include/uapi/linux/virtio_vsock.h
>>> @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
>>>  enum virtio_vsock_type {
>>>     VIRTIO_VSOCK_TYPE_STREAM = 1,
>>>     VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
>>> +   VIRTIO_VSOCK_TYPE_DGRAM = 3,
>>>  };
>>>  
>>>  enum virtio_vsock_op {
>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>>> index 1893f8aafa48..87e4ae1866d3 100644
>>> --- a/net/vmw_vsock/af_vsock.c
>>> +++ b/net/vmw_vsock/af_vsock.c
>>> @@ -675,6 +675,19 @@ static int __vsock_bind_connectible(struct vsock_sock 
>>> *vsk,
>>>     return 0;
>>>  }
>>>  
>>> +int vsock_bind_stream(struct vsock_sock *vsk,
>>> +                 struct sockaddr_vm *addr)
>>> +{
>>> +   int retval;
>>> +
>>> +   spin_lock_bh(&vsock_table_lock);
>>> +   retval = __vsock_bind_connectible(vsk, addr);
>>> +   spin_unlock_bh(&vsock_table_lock);
>>> +
>>> +   return retval;
>>> +}
>>> +EXPORT_SYMBOL(vsock_bind_stream);
>>> +
>>>  static int __vsock_bind_dgram(struct vsock_sock *vsk,
>>>                           struct sockaddr_vm *addr)
>>>  {
>>> @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct 
>>> vsock_transport *t, int features)
>>>     }
>>>  
>>>     if (features & VSOCK_TRANSPORT_F_DGRAM) {
>>> -           if (t_dgram) {
>>> -                   err = -EBUSY;
>>> -                   goto err_busy;
>>> +           /* TODO: always chose the G2H variant over others, support 
>>> nesting later */
>>> +           if (features & VSOCK_TRANSPORT_F_G2H) {
>>> +                   if (t_dgram)
>>> +                           pr_warn("virtio_vsock: t_dgram already set\n");
>>> +                   t_dgram = t;
>>> +           }
>>> +
>>> +           if (!t_dgram) {
>>> +                   t_dgram = t;
>>>             }
>>> -           t_dgram = t;
>>>     }
>>>  
>>>     if (features & VSOCK_TRANSPORT_F_LOCAL) {
>>> diff --git a/net/vmw_vsock/virtio_transport.c 
>>> b/net/vmw_vsock/virtio_transport.c
>>> index 073314312683..d4526ca462d2 100644
>>> --- a/net/vmw_vsock/virtio_transport.c
>>> +++ b/net/vmw_vsock/virtio_transport.c
>>> @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
>>>             return -ENOMEM;
>>>  
>>>     ret = vsock_core_register(&virtio_transport.transport,
>>> -                             VSOCK_TRANSPORT_F_G2H);
>>> +                             VSOCK_TRANSPORT_F_G2H | 
>>> VSOCK_TRANSPORT_F_DGRAM);
>>>     if (ret)
>>>             goto out_wq;
>>>  
>>> diff --git a/net/vmw_vsock/virtio_transport_common.c 
>>> b/net/vmw_vsock/virtio_transport_common.c
>>> index bdf16fff054f..aedb48728677 100644
>>> --- a/net/vmw_vsock/virtio_transport_common.c
>>> +++ b/net/vmw_vsock/virtio_transport_common.c
>>> @@ -229,7 +229,9 @@ EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
>>>  
>>>  static u16 virtio_transport_get_type(struct sock *sk)
>>>  {
>>> -   if (sk->sk_type == SOCK_STREAM)
>>> +   if (sk->sk_type == SOCK_DGRAM)
>>> +           return VIRTIO_VSOCK_TYPE_DGRAM;
>>> +   else if (sk->sk_type == SOCK_STREAM)
>>>             return VIRTIO_VSOCK_TYPE_STREAM;
>>>     else
>>>             return VIRTIO_VSOCK_TYPE_SEQPACKET;
>>> @@ -287,22 +289,29 @@ static int virtio_transport_send_pkt_info(struct 
>>> vsock_sock *vsk,
>>>     vvs = vsk->trans;
>>>  
>>>     /* we can send less than pkt_len bytes */
>>> -   if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
>>> -           pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>>> +   if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
>>> +           if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>>> +                   pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
>>> +           else
>>> +                   return 0;
>>> +   }
>>>  
>>> -   /* virtio_transport_get_credit might return less than pkt_len credit */
>>> -   pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>> +   if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
>>> +           /* virtio_transport_get_credit might return less than pkt_len 
>>> credit */
>>> +           pkt_len = virtio_transport_get_credit(vvs, pkt_len);
>>>  
>>> -   /* Do not send zero length OP_RW pkt */
>>> -   if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>> -           return pkt_len;
>>> +           /* Do not send zero length OP_RW pkt */
>>> +           if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
>>> +                   return pkt_len;
>>> +   }
>>>  
>>>     skb = virtio_transport_alloc_skb(info, pkt_len,
>>>                                      src_cid, src_port,
>>>                                      dst_cid, dst_port,
>>>                                      &err);
>>>     if (!skb) {
>>> -           virtio_transport_put_credit(vvs, pkt_len);
>>> +           if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
>>> +                   virtio_transport_put_credit(vvs, pkt_len);
>>>             return err;
>>>     }
>>>  
>>> @@ -586,6 +595,61 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock 
>>> *vsk,
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
>>>  
>>> +static ssize_t
>>> +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
>>> +                             struct msghdr *msg, size_t len)
>>> +{
>>> +   struct virtio_vsock_sock *vvs = vsk->trans;
>>> +   struct sk_buff *skb;
>>> +   size_t total = 0;
>>> +   u32 free_space;
>>> +   int err = -EFAULT;
>>> +
>>> +   spin_lock_bh(&vvs->rx_lock);
>>> +   if (total < len && !skb_queue_empty_lockless(&vvs->rx_queue)) {
>>> +           skb = __skb_dequeue(&vvs->rx_queue);
>>> +
>>> +           total = len;
>>> +           if (total > skb->len - vsock_metadata(skb)->off)
>>> +                   total = skb->len - vsock_metadata(skb)->off;
>>> +           else if (total < skb->len - vsock_metadata(skb)->off)
>>> +                   msg->msg_flags |= MSG_TRUNC;
>>> +
>>> +           /* sk_lock is held by caller so no one else can dequeue.
>>> +            * Unlock rx_lock since memcpy_to_msg() may sleep.
>>> +            */
>>> +           spin_unlock_bh(&vvs->rx_lock);
>>> +
>>> +           err = memcpy_to_msg(msg, skb->data + vsock_metadata(skb)->off, 
>>> total);
>>> +           if (err)
>>> +                   return err;
>>> +
>>> +           spin_lock_bh(&vvs->rx_lock);
>>> +
>>> +           virtio_transport_dec_rx_pkt(vvs, skb);
>>> +           consume_skb(skb);
>>> +   }
>>> +
>>> +   free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
>>> +
>>> +   spin_unlock_bh(&vvs->rx_lock);
>>> +
>>> +   if (total > 0 && msg->msg_name) {
>>> +           /* Provide the address of the sender. */
>>> +           DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name);
>>> +
>>> +           vsock_addr_init(vm_addr, le64_to_cpu(vsock_hdr(skb)->src_cid),
>>> +                           le32_to_cpu(vsock_hdr(skb)->src_port));
>>> +           msg->msg_namelen = sizeof(*vm_addr);
>>> +   }
>>> +   return total;
>>> +}
>>> +
>>> +static s64 virtio_transport_dgram_has_data(struct vsock_sock *vsk)
>>> +{
>>> +   return virtio_transport_stream_has_data(vsk);
>>> +}
>>> +
>>>  int
>>>  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
>>>                                struct msghdr *msg,
>>> @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
>>>                            struct msghdr *msg,
>>>                            size_t len, int flags)
>>>  {
>>> -   return -EOPNOTSUPP;
>>> +   struct sock *sk;
>>> +   size_t err = 0;
>>> +   long timeout;
>>> +
>>> +   DEFINE_WAIT(wait);
>>> +
>>> +   sk = &vsk->sk;
>>> +   err = 0;
>>> +
>>> +   if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags & MSG_PEEK)
>>> +           return -EOPNOTSUPP;
>>> +
>>> +   lock_sock(sk);
>>> +
>>> +   if (!len)
>>> +           goto out;
>>> +
>>> +   timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>>> +
>>> +   while (1) {
>>> +           s64 ready;
>>> +
>>> +           prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
>>> +           ready = virtio_transport_dgram_has_data(vsk);
>>> +
>>> +           if (ready == 0) {
>>> +                   if (timeout == 0) {
>>> +                           err = -EAGAIN;
>>> +                           finish_wait(sk_sleep(sk), &wait);
>>> +                           break;
>>> +                   }
>>> +
>>> +                   release_sock(sk);
>>> +                   timeout = schedule_timeout(timeout);
>>> +                   lock_sock(sk);
>>> +
>>> +                   if (signal_pending(current)) {
>>> +                           err = sock_intr_errno(timeout);
>>> +                           finish_wait(sk_sleep(sk), &wait);
>>> +                           break;
>>> +                   } else if (timeout == 0) {
>>> +                           err = -EAGAIN;
>>> +                           finish_wait(sk_sleep(sk), &wait);
>>> +                           break;
>>> +                   }
>>> +           } else {
>>> +                   finish_wait(sk_sleep(sk), &wait);
>>> +
>>> +                   if (ready < 0) {
>>> +                           err = -ENOMEM;
>>> +                           goto out;
>>> +                   }
>>> +
>>> +                   err = virtio_transport_dgram_do_dequeue(vsk, msg, len);
>>> +                   break;
>>> +           }
>>> +   }
>>> +out:
>>> +   release_sock(sk);
>>> +   return err;
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
^^^
May be, this generic data waiting logic should be in af_vsock.c, as for 
stream/seqpacket?
In this way, another transport which supports SOCK_DGRAM could reuse it.
>>>  
>>> @@ -819,13 +942,13 @@ EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
>>>  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
>>>                             struct sockaddr_vm *addr)
>>>  {
>>> -   return -EOPNOTSUPP;
>>> +   return vsock_bind_stream(vsk, addr);
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
>>>  
>>>  bool virtio_transport_dgram_allow(u32 cid, u32 port)
>>>  {
>>> -   return false;
>>> +   return true;
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
>>>  
>>> @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
>>>                            struct msghdr *msg,
>>>                            size_t dgram_len)
>>>  {
>>> -   return -EOPNOTSUPP;
>>> +   struct virtio_vsock_pkt_info info = {
>>> +           .op = VIRTIO_VSOCK_OP_RW,
>>> +           .msg = msg,
>>> +           .pkt_len = dgram_len,
>>> +           .vsk = vsk,
>>> +           .remote_cid = remote_addr->svm_cid,
>>> +           .remote_port = remote_addr->svm_port,
>>> +   };
>>> +
>>> +   return virtio_transport_send_pkt_info(vsk, &info);
>>>  }
>>>  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
>>>  
>>> @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct sock *sk,
>>>     struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
>>>     int err = 0;
>>>  
>>> +   if (le16_to_cpu(vsock_hdr(skb)->type) == VIRTIO_VSOCK_TYPE_DGRAM) {
>>> +           virtio_transport_recv_enqueue(vsk, skb);
>>> +           sk->sk_data_ready(sk);
>>> +           return err;
>>> +   }
>>> +
>>>     switch (le16_to_cpu(hdr->op)) {
>>>     case VIRTIO_VSOCK_OP_RW:
>>>             virtio_transport_recv_enqueue(vsk, skb);
>>> @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct sock *sk, struct 
>>> sk_buff *skb,
>>>  static bool virtio_transport_valid_type(u16 type)
>>>  {
>>>     return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
>>> -          (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
>>> +          (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
>>> +          (type == VIRTIO_VSOCK_TYPE_DGRAM);
>>>  }
>>>  
>>>  /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's 
>>> vq->mutex
>>> @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct 
>>> virtio_transport *t,
>>>             goto free_pkt;
>>>     }
>>>  
>>> +   if (sk->sk_type == SOCK_DGRAM) {
>>> +           virtio_transport_recv_connected(sk, skb);
>>> +           goto out;
>>> +   }
>>> +
>>>     space_available = virtio_transport_space_update(sk, skb);
>>>  
>>>     /* Update CID in case it has changed after a transport reset event */
>>> @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct 
>>> virtio_transport *t,
>>>             break;
>>>     }
>>>  
>>> +out:
>>>     release_sock(sk);
>>>  
>>>     /* Release refcnt obtained when we fetched this socket out of the
>>> -- 
>>> 2.35.1
>>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: [email protected]
>> For additional commands, e-mail: [email protected]
>>
> 

Reply via email to