On Tue, 2022-08-16 at 09:58 +0000, Bobby Eshleman wrote:
> On Wed, Aug 17, 2022 at 05:42:08AM +0000, Arseniy Krasnov wrote:
> > On 17.08.2022 08:01, Arseniy Krasnov wrote:
> > > On 16.08.2022 05:32, Bobby Eshleman wrote:
> > > > CC'ing [email protected]
> > > > 
> > > > On Mon, Aug 15, 2022 at 10:56:08AM -0700, Bobby Eshleman wrote:
> > > > > This patch supports dgram in virtio and on the vhost side.
> > > Hello,
> > > 
> > > sorry, i don't understand, how this maintains message boundaries?
> > > Or it
> > > is unnecessary for SOCK_DGRAM?
> > > 
> > > Thanks
> > > > > Signed-off-by: Jiang Wang <[email protected]>
> > > > > Signed-off-by: Bobby Eshleman <[email protected]>
> > > > > ---
> > > > >  drivers/vhost/vsock.c                   |   2 +-
> > > > >  include/net/af_vsock.h                  |   2 +
> > > > >  include/uapi/linux/virtio_vsock.h       |   1 +
> > > > >  net/vmw_vsock/af_vsock.c                |  26 +++-
> > > > >  net/vmw_vsock/virtio_transport.c        |   2 +-
> > > > >  net/vmw_vsock/virtio_transport_common.c | 173
> > > > > ++++++++++++++++++++++--
> > > > >  6 files changed, 186 insertions(+), 20 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > > > index a5d1bdb786fe..3dc72a5647ca 100644
> > > > > --- a/drivers/vhost/vsock.c
> > > > > +++ b/drivers/vhost/vsock.c
> > > > > @@ -925,7 +925,7 @@ static int __init vhost_vsock_init(void)
> > > > >       int ret;
> > > > >  
> > > > >       ret = vsock_core_register(&vhost_transport.transport,
> > > > > -                               VSOCK_TRANSPORT_F_H2G);
> > > > > +                               VSOCK_TRANSPORT_F_H2G |
> > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > >       if (ret < 0)
> > > > >               return ret;
> > > > >  
> > > > > diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
> > > > > index 1c53c4c4d88f..37e55c81e4df 100644
> > > > > --- a/include/net/af_vsock.h
> > > > > +++ b/include/net/af_vsock.h
> > > > > @@ -78,6 +78,8 @@ struct vsock_sock {
> > > > >  s64 vsock_stream_has_data(struct vsock_sock *vsk);
> > > > >  s64 vsock_stream_has_space(struct vsock_sock *vsk);
> > > > >  struct sock *vsock_create_connected(struct sock *parent);
> > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > +                   struct sockaddr_vm *addr);
> > > > >  
> > > > >  /**** TRANSPORT ****/
> > > > >  
> > > > > diff --git a/include/uapi/linux/virtio_vsock.h
> > > > > b/include/uapi/linux/virtio_vsock.h
> > > > > index 857df3a3a70d..0975b9c88292 100644
> > > > > --- a/include/uapi/linux/virtio_vsock.h
> > > > > +++ b/include/uapi/linux/virtio_vsock.h
> > > > > @@ -70,6 +70,7 @@ struct virtio_vsock_hdr {
> > > > >  enum virtio_vsock_type {
> > > > >       VIRTIO_VSOCK_TYPE_STREAM = 1,
> > > > >       VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
> > > > > +     VIRTIO_VSOCK_TYPE_DGRAM = 3,
> > > > >  };
> > > > >  
> > > > >  enum virtio_vsock_op {
> > > > > diff --git a/net/vmw_vsock/af_vsock.c
> > > > > b/net/vmw_vsock/af_vsock.c
> > > > > index 1893f8aafa48..87e4ae1866d3 100644
> > > > > --- a/net/vmw_vsock/af_vsock.c
> > > > > +++ b/net/vmw_vsock/af_vsock.c
> > > > > @@ -675,6 +675,19 @@ static int
> > > > > __vsock_bind_connectible(struct vsock_sock *vsk,
> > > > >       return 0;
> > > > >  }
> > > > >  
> > > > > +int vsock_bind_stream(struct vsock_sock *vsk,
> > > > > +                   struct sockaddr_vm *addr)
> > > > > +{
> > > > > +     int retval;
> > > > > +
> > > > > +     spin_lock_bh(&vsock_table_lock);
> > > > > +     retval = __vsock_bind_connectible(vsk, addr);
> > > > > +     spin_unlock_bh(&vsock_table_lock);
> > > > > +
> > > > > +     return retval;
> > > > > +}
> > > > > +EXPORT_SYMBOL(vsock_bind_stream);
> > > > > +
> > > > >  static int __vsock_bind_dgram(struct vsock_sock *vsk,
> > > > >                             struct sockaddr_vm *addr)
> > > > >  {
> > > > > @@ -2363,11 +2376,16 @@ int vsock_core_register(const struct
> > > > > vsock_transport *t, int features)
> > > > >       }
> > > > >  
> > > > >       if (features & VSOCK_TRANSPORT_F_DGRAM) {
> > > > > -             if (t_dgram) {
> > > > > -                     err = -EBUSY;
> > > > > -                     goto err_busy;
> > > > > +             /* TODO: always chose the G2H variant over
> > > > > others, support nesting later */
> > > > > +             if (features & VSOCK_TRANSPORT_F_G2H) {
> > > > > +                     if (t_dgram)
> > > > > +                             pr_warn("virtio_vsock: t_dgram
> > > > > already set\n");
> > > > > +                     t_dgram = t;
> > > > > +             }
> > > > > +
> > > > > +             if (!t_dgram) {
> > > > > +                     t_dgram = t;
> > > > >               }
> > > > > -             t_dgram = t;
> > > > >       }
> > > > >  
> > > > >       if (features & VSOCK_TRANSPORT_F_LOCAL) {
> > > > > diff --git a/net/vmw_vsock/virtio_transport.c
> > > > > b/net/vmw_vsock/virtio_transport.c
> > > > > index 073314312683..d4526ca462d2 100644
> > > > > --- a/net/vmw_vsock/virtio_transport.c
> > > > > +++ b/net/vmw_vsock/virtio_transport.c
> > > > > @@ -850,7 +850,7 @@ static int __init virtio_vsock_init(void)
> > > > >               return -ENOMEM;
> > > > >  
> > > > >       ret = vsock_core_register(&virtio_transport.transport,
> > > > > -                               VSOCK_TRANSPORT_F_G2H);
> > > > > +                               VSOCK_TRANSPORT_F_G2H |
> > > > > VSOCK_TRANSPORT_F_DGRAM);
> > > > >       if (ret)
> > > > >               goto out_wq;
> > > > >  
> > > > > diff --git a/net/vmw_vsock/virtio_transport_common.c
> > > > > b/net/vmw_vsock/virtio_transport_common.c
> > > > > index bdf16fff054f..aedb48728677 100644
> > > > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > > > @@ -229,7 +229,9 @@
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
> > > > >  
> > > > >  static u16 virtio_transport_get_type(struct sock *sk)
> > > > >  {
> > > > > -     if (sk->sk_type == SOCK_STREAM)
> > > > > +     if (sk->sk_type == SOCK_DGRAM)
> > > > > +             return VIRTIO_VSOCK_TYPE_DGRAM;
> > > > > +     else if (sk->sk_type == SOCK_STREAM)
> > > > >               return VIRTIO_VSOCK_TYPE_STREAM;
> > > > >       else
> > > > >               return VIRTIO_VSOCK_TYPE_SEQPACKET;
> > > > > @@ -287,22 +289,29 @@ static int
> > > > > virtio_transport_send_pkt_info(struct vsock_sock *vsk,
> > > > >       vvs = vsk->trans;
> > > > >  
> > > > >       /* we can send less than pkt_len bytes */
> > > > > -     if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
> > > > > -             pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > +     if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
> > > > > +             if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > +                     pkt_len =
> > > > > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
> > > > > +             else
> > > > > +                     return 0;
> > > > > +     }
> > > > >  
> > > > > -     /* virtio_transport_get_credit might return less than
> > > > > pkt_len credit */
> > > > > -     pkt_len = virtio_transport_get_credit(vvs, pkt_len);
> > > > > +     if (info->type != VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > +             /* virtio_transport_get_credit might return
> > > > > less than pkt_len credit */
> > > > > +             pkt_len = virtio_transport_get_credit(vvs,
> > > > > pkt_len);
> > > > >  
> > > > > -     /* Do not send zero length OP_RW pkt */
> > > > > -     if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
> > > > > -             return pkt_len;
> > > > > +             /* Do not send zero length OP_RW pkt */
> > > > > +             if (pkt_len == 0 && info->op ==
> > > > > VIRTIO_VSOCK_OP_RW)
> > > > > +                     return pkt_len;
> > > > > +     }
> > > > >  
> > > > >       skb = virtio_transport_alloc_skb(info, pkt_len,
> > > > >                                        src_cid, src_port,
> > > > >                                        dst_cid, dst_port,
> > > > >                                        &err);
> > > > >       if (!skb) {
> > > > > -             virtio_transport_put_credit(vvs, pkt_len);
> > > > > +             if (info->type != VIRTIO_VSOCK_TYPE_DGRAM)
> > > > > +                     virtio_transport_put_credit(vvs,
> > > > > pkt_len);
> > > > >               return err;
> > > > >       }
> > > > >  
> > > > > @@ -586,6 +595,61 @@
> > > > > virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
> > > > >  
> > > > > +static ssize_t
> > > > > +virtio_transport_dgram_do_dequeue(struct vsock_sock *vsk,
> > > > > +                               struct msghdr *msg, size_t
> > > > > len)
> > > > > +{
> > > > > +     struct virtio_vsock_sock *vvs = vsk->trans;
> > > > > +     struct sk_buff *skb;
> > > > > +     size_t total = 0;
> > > > > +     u32 free_space;
> > > > > +     int err = -EFAULT;
> > > > > +
> > > > > +     spin_lock_bh(&vvs->rx_lock);
> > > > > +     if (total < len && !skb_queue_empty_lockless(&vvs-
> > > > > >rx_queue)) {
> > > > > +             skb = __skb_dequeue(&vvs->rx_queue);
> > > > > +
> > > > > +             total = len;
> > > > > +             if (total > skb->len - vsock_metadata(skb)-
> > > > > >off)
> > > > > +                     total = skb->len - vsock_metadata(skb)-
> > > > > >off;
> > > > > +             else if (total < skb->len -
> > > > > vsock_metadata(skb)->off)
> > > > > +                     msg->msg_flags |= MSG_TRUNC;
> > > > > +
> > > > > +             /* sk_lock is held by caller so no one else can
> > > > > dequeue.
> > > > > +              * Unlock rx_lock since memcpy_to_msg() may
> > > > > sleep.
> > > > > +              */
> > > > > +             spin_unlock_bh(&vvs->rx_lock);
> > > > > +
> > > > > +             err = memcpy_to_msg(msg, skb->data +
> > > > > vsock_metadata(skb)->off, total);
> > > > > +             if (err)
> > > > > +                     return err;
> > > > > +
> > > > > +             spin_lock_bh(&vvs->rx_lock);
> > > > > +
> > > > > +             virtio_transport_dec_rx_pkt(vvs, skb);
> > > > > +             consume_skb(skb);
> > > > > +     }
> > > > > +
> > > > > +     free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs-
> > > > > >last_fwd_cnt);
> > > > > +
> > > > > +     spin_unlock_bh(&vvs->rx_lock);
> > > > > +
> > > > > +     if (total > 0 && msg->msg_name) {
> > > > > +             /* Provide the address of the sender. */
> > > > > +             DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr,
> > > > > msg->msg_name);
> > > > > +
> > > > > +             vsock_addr_init(vm_addr,
> > > > > le64_to_cpu(vsock_hdr(skb)->src_cid),
> > > > > +                             le32_to_cpu(vsock_hdr(skb)-
> > > > > >src_port));
> > > > > +             msg->msg_namelen = sizeof(*vm_addr);
> > > > > +     }
> > > > > +     return total;
> > > > > +}
> > > > > +
> > > > > +static s64 virtio_transport_dgram_has_data(struct vsock_sock
> > > > > *vsk)
> > > > > +{
> > > > > +     return virtio_transport_stream_has_data(vsk);
> > > > > +}
> > > > > +
> > > > >  int
> > > > >  virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
> > > > >                                  struct msghdr *msg,
> > > > > @@ -611,7 +675,66 @@ virtio_transport_dgram_dequeue(struct
> > > > > vsock_sock *vsk,
> > > > >                              struct msghdr *msg,
> > > > >                              size_t len, int flags)
> > > > >  {
> > > > > -     return -EOPNOTSUPP;
> > > > > +     struct sock *sk;
> > > > > +     size_t err = 0;
> > > > > +     long timeout;
> > > > > +
> > > > > +     DEFINE_WAIT(wait);
> > > > > +
> > > > > +     sk = &vsk->sk;
> > > > > +     err = 0;
> > > > > +
> > > > > +     if (flags & MSG_OOB || flags & MSG_ERRQUEUE || flags &
> > > > > MSG_PEEK)
> > > > > +             return -EOPNOTSUPP;
> > > > > +
> > > > > +     lock_sock(sk);
> > > > > +
> > > > > +     if (!len)
> > > > > +             goto out;
> > > > > +
> > > > > +     timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
> > > > > +
> > > > > +     while (1) {
> > > > > +             s64 ready;
> > > > > +
> > > > > +             prepare_to_wait(sk_sleep(sk), &wait,
> > > > > TASK_INTERRUPTIBLE);
> > > > > +             ready = virtio_transport_dgram_has_data(vsk);
> > > > > +
> > > > > +             if (ready == 0) {
> > > > > +                     if (timeout == 0) {
> > > > > +                             err = -EAGAIN;
> > > > > +                             finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > +                             break;
> > > > > +                     }
> > > > > +
> > > > > +                     release_sock(sk);
> > > > > +                     timeout = schedule_timeout(timeout);
> > > > > +                     lock_sock(sk);
> > > > > +
> > > > > +                     if (signal_pending(current)) {
> > > > > +                             err = sock_intr_errno(timeout);
> > > > > +                             finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > +                             break;
> > > > > +                     } else if (timeout == 0) {
> > > > > +                             err = -EAGAIN;
> > > > > +                             finish_wait(sk_sleep(sk),
> > > > > &wait);
> > > > > +                             break;
> > > > > +                     }
> > > > > +             } else {
> > > > > +                     finish_wait(sk_sleep(sk), &wait);
> > > > > +
> > > > > +                     if (ready < 0) {
> > > > > +                             err = -ENOMEM;
> > > > > +                             goto out;
> > > > > +                     }
> > > > > +
> > > > > +                     err =
> > > > > virtio_transport_dgram_do_dequeue(vsk, msg, len);
> > > > > +                     break;
> > > > > +             }
> > > > > +     }
> > > > > +out:
> > > > > +     release_sock(sk);
> > > > > +     return err;
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
> > ^^^
> > May be, this generic data waiting logic should be in af_vsock.c, as
> > for stream/seqpacket?
> > In this way, another transport which supports SOCK_DGRAM could
> > reuse it.
> 
> I think that is a great idea. I'll test that change for v2.
> 
> Thanks.

Also for v2, i tested Your patchset a little bit(write here to not
spread over all mails):
1) seqpacket test in vsock_test.c fails(seems MSG_EOR flag issue)
2) i can't do rmmod with the following config(after testing):
   CONFIG_VSOCKETS=m
   CONFIG_VIRTIO_VSOCKETS=m
   CONFIG_VIRTIO_VSOCKETS_COMMON=m
   CONFIG_VHOST=m
   CONFIG_VHOST_VSOCK=m
   Guest is shutdown, but rmmod fails.
3) virtio_transport_init + virtio_transport_exit seems must be
   under EXPORT_SYMBOL_GPL(), because both used in another module.
4) I tried to send 5kb(or 20kb not matter) piece of data, but got      
   kernel panic both in guest and later in host.

Thank You
> 
> > > > >  
> > > > > @@ -819,13 +942,13 @@
> > > > > EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
> > > > >  int virtio_transport_dgram_bind(struct vsock_sock *vsk,
> > > > >                               struct sockaddr_vm *addr)
> > > > >  {
> > > > > -     return -EOPNOTSUPP;
> > > > > +     return vsock_bind_stream(vsk, addr);
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
> > > > >  
> > > > >  bool virtio_transport_dgram_allow(u32 cid, u32 port)
> > > > >  {
> > > > > -     return false;
> > > > > +     return true;
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
> > > > >  
> > > > > @@ -861,7 +984,16 @@ virtio_transport_dgram_enqueue(struct
> > > > > vsock_sock *vsk,
> > > > >                              struct msghdr *msg,
> > > > >                              size_t dgram_len)
> > > > >  {
> > > > > -     return -EOPNOTSUPP;
> > > > > +     struct virtio_vsock_pkt_info info = {
> > > > > +             .op = VIRTIO_VSOCK_OP_RW,
> > > > > +             .msg = msg,
> > > > > +             .pkt_len = dgram_len,
> > > > > +             .vsk = vsk,
> > > > > +             .remote_cid = remote_addr->svm_cid,
> > > > > +             .remote_port = remote_addr->svm_port,
> > > > > +     };
> > > > > +
> > > > > +     return virtio_transport_send_pkt_info(vsk, &info);
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
> > > > >  
> > > > > @@ -1165,6 +1297,12 @@ virtio_transport_recv_connected(struct
> > > > > sock *sk,
> > > > >       struct virtio_vsock_hdr *hdr = vsock_hdr(skb);
> > > > >       int err = 0;
> > > > >  
> > > > > +     if (le16_to_cpu(vsock_hdr(skb)->type) ==
> > > > > VIRTIO_VSOCK_TYPE_DGRAM) {
> > > > > +             virtio_transport_recv_enqueue(vsk, skb);
> > > > > +             sk->sk_data_ready(sk);
> > > > > +             return err;
> > > > > +     }
> > > > > +
> > > > >       switch (le16_to_cpu(hdr->op)) {
> > > > >       case VIRTIO_VSOCK_OP_RW:
> > > > >               virtio_transport_recv_enqueue(vsk, skb);
> > > > > @@ -1320,7 +1458,8 @@ virtio_transport_recv_listen(struct
> > > > > sock *sk, struct sk_buff *skb,
> > > > >  static bool virtio_transport_valid_type(u16 type)
> > > > >  {
> > > > >       return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
> > > > > -            (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
> > > > > +            (type == VIRTIO_VSOCK_TYPE_SEQPACKET) ||
> > > > > +            (type == VIRTIO_VSOCK_TYPE_DGRAM);
> > > > >  }
> > > > >  
> > > > >  /* We are under the virtio-vsock's vsock->rx_lock or vhost-
> > > > > vsock's vq->mutex
> > > > > @@ -1384,6 +1523,11 @@ void virtio_transport_recv_pkt(struct
> > > > > virtio_transport *t,
> > > > >               goto free_pkt;
> > > > >       }
> > > > >  
> > > > > +     if (sk->sk_type == SOCK_DGRAM) {
> > > > > +             virtio_transport_recv_connected(sk, skb);
> > > > > +             goto out;
> > > > > +     }
> > > > > +
> > > > >       space_available = virtio_transport_space_update(sk,
> > > > > skb);
> > > > >  
> > > > >       /* Update CID in case it has changed after a transport
> > > > > reset event */
> > > > > @@ -1415,6 +1559,7 @@ void virtio_transport_recv_pkt(struct
> > > > > virtio_transport *t,
> > > > >               break;
> > > > >       }
> > > > >  
> > > > > +out:
> > > > >       release_sock(sk);
> > > > >  
> > > > >       /* Release refcnt obtained when we fetched this socket
> > > > > out of the
> > > > > -- 
> > > > > 2.35.1
> > > > > 
> > > > 
> > > > -------------------------------------------------------------
> > > > --------
> > > > To unsubscribe, e-mail: 
> > > > [email protected]
> > > > For additional commands, e-mail: 
> > > > [email protected]
> > > > 
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [email protected]
> For additional commands, e-mail: [email protected]
> 

Reply via email to