On Tue, Jan 14, 2014 at 12:10 PM, Michael S. Tsirkin <m...@redhat.com> wrote:

> On Mon, Jan 13, 2014 at 03:25:16PM +0100, Antonios Motakis wrote:
> > Add structures for passing vhost-user messages over a unix domain socket.
> > This is the equivalent of the existing vhost-kernel ioctls.
> >
> > Connect to the named unix domain socket. The system call sendmsg
> > is used for communication. To be able to pass file descriptors
> > between processes - we use SCM_RIGHTS type in the message control header.
> >
> > Signed-off-by: Antonios Motakis <a.mota...@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikol...@virtualopensystems.com>
>
> Not all comments in v5 review of this file have been
> addressed.
> I think if a comment was wrong it's a good idea to
> add clarification in code on why it is, so future readers
> aren't confused.
>
> In particular I think always explicitly opening
> domain sockets and forcing a specific client/server
> model is a problem.
>
>
I think we need to clarify how the model would work when using QEMU as the
server. As a client it is clear, because it parallels what vhost does when
used with the kernel, however as a server it is not completely clear what
the use case is and how it would work.


> I think we also want the protocol documented in docs/
> so future users don't have to read the code to connect
> to qemu.
>
> Can you add a unit test for this code?
>
> We ship pxe ROM that enables virtio net,
> so you can write a small stub that works as the
> remote side of this protocol, poke at
> guest memory to see that it has
> the expected content (compare to what we get with
> readl).
>
> In theory you could also get and put some buffers
> though that's not a must.
>
>
> > ---
> >  hw/virtio/vhost-backend.c | 306
> +++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 301 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 1d83e1d..460ee02 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -11,34 +11,330 @@
> >  #include "hw/virtio/vhost.h"
> >  #include "hw/virtio/vhost-backend.h"
> >  #include "qemu/error-report.h"
> > +#include "qemu/sockets.h"
> >
> >  #include <fcntl.h>
> >  #include <unistd.h>
> >  #include <sys/ioctl.h>
> > +#include <sys/socket.h>
> > +#include <sys/un.h>
> > +#include <linux/vhost.h>
> > +
> > +#define VHOST_MEMORY_MAX_NREGIONS    8
> > +#define VHOST_USER_SOCKTO            (300) /* msec */
> > +
> > +typedef enum VhostUserRequest {
> > +    VHOST_USER_NONE = 0,
> > +    VHOST_USER_GET_FEATURES = 1,
> > +    VHOST_USER_SET_FEATURES = 2,
> > +    VHOST_USER_SET_OWNER = 3,
> > +    VHOST_USER_RESET_OWNER = 4,
> > +    VHOST_USER_SET_MEM_TABLE = 5,
> > +    VHOST_USER_SET_LOG_BASE = 6,
> > +    VHOST_USER_SET_LOG_FD = 7,
> > +    VHOST_USER_SET_VRING_NUM = 8,
> > +    VHOST_USER_SET_VRING_ADDR = 9,
> > +    VHOST_USER_SET_VRING_BASE = 10,
> > +    VHOST_USER_GET_VRING_BASE = 11,
> > +    VHOST_USER_SET_VRING_KICK = 12,
> > +    VHOST_USER_SET_VRING_CALL = 13,
> > +    VHOST_USER_SET_VRING_ERR = 14,
> > +    VHOST_USER_NET_SET_BACKEND = 15,
> > +    VHOST_USER_ECHO = 16,
> > +    VHOST_USER_MAX
> > +} VhostUserRequest;
> > +
> > +typedef struct VhostUserMemoryRegion {
> > +    uint64_t guest_phys_addr;
> > +    uint64_t memory_size;
> > +    uint64_t userspace_addr;
> > +} VhostUserMemoryRegion;
> > +
> > +typedef struct VhostUserMemory {
> > +    uint32_t nregions;
> > +    uint32_t padding;
> > +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> > +} VhostUserMemory;
> > +
> > +typedef struct VhostUserMsg {
> > +    VhostUserRequest request;
> > +
> > +#define VHOST_USER_VERSION_MASK     (0x3)
> > +#define VHOST_USER_REPLY_MASK       (0x1<<2)
> > +    uint32_t flags;
> > +    uint32_t size; /* the following payload size */
> > +    union {
> > +        uint64_t u64;
> > +        struct vhost_vring_state state;
> > +        struct vhost_vring_addr addr;
> > +        VhostUserMemory memory;
> > +    };
> > +} QEMU_PACKED VhostUserMsg;
> > +
> > +static VhostUserMsg m __attribute__ ((unused));
> > +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
> > +                            + sizeof(m.flags) \
> > +                            + sizeof(m.size))
> > +
> > +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
> > +
> > +/* The version of the protocol we support */
> > +#define VHOST_USER_VERSION    (0x1)
> > +
> > +static int vhost_user_recv(int fd, VhostUserMsg *msg)
> > +{
> > +    ssize_t r;
> > +    uint8_t *p = (uint8_t *) msg;
> > +
> > +    /* read the header */
> > +    do {
> > +        r = read(fd, p, VHOST_USER_HDR_SIZE);
> > +    } while (r < 0 && errno == EINTR);
> > +
> > +    if (r < 0) {
> > +        error_report("Failed to read msg header, reason: %s\n",
> > +                     strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (r != VHOST_USER_HDR_SIZE) {
> > +        error_report("Failed to read msg header. Read %zu instead of
> %zu.\n",
> > +                     r, VHOST_USER_HDR_SIZE);
> > +        goto fail;
> > +    }
> > +
> > +    /* validate received flags */
> > +    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
> > +        error_report("Failed to read msg header."
> > +                     " Flags 0x%x instead of 0x%x.\n",
> > +                     msg->flags, VHOST_USER_REPLY_MASK |
> VHOST_USER_VERSION);
> > +        goto fail;
> > +    }
> > +
> > +    /* validate message size is sane */
> > +    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
> > +        error_report("Failed to read msg header."
> > +                     " Size %d exceeds the maximum %zu.\n",
> > +                     msg->size, VHOST_USER_PAYLOAD_SIZE);
> > +        goto fail;
> > +    }
> > +
> > +    p += VHOST_USER_HDR_SIZE;
> > +
> > +    /* read the payload */
> > +    do {
> > +        r = read(fd, p, msg->size);
> > +    } while (r < 0 && errno == EINTR);
> > +
> > +    if (r < 0) {
> > +        error_report("Failed to read msg payload, reason: %s\n",
> > +                     strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (r != msg->size) {
> > +        error_report("Failed to read msg payload. Read %zu instead of
> %d.\n",
> > +                     r, msg->size);
> > +        goto fail;
> > +    }
> > +
> > +    return 0;
> > +
> > +fail:
> > +    return -1;
> > +}
> > +
> > +static int vhost_user_send_fds(int fd, VhostUserMsg *msg, int *fds,
> > +        size_t fd_num)
> > +{
> > +    int r;
> > +
> > +    struct msghdr msgh;
> > +    struct iovec iov;
> > +
> > +    size_t fd_size = fd_num * sizeof(int);
> > +    char control[CMSG_SPACE(fd_size)];
> > +    struct cmsghdr *cmsg;
> > +
> > +    memset(&msgh, 0, sizeof(msgh));
> > +    memset(control, 0, sizeof(control));
> > +
> > +    /* set the payload */
> > +    iov.iov_base = msg;
> > +    iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
> > +
> > +    msgh.msg_iov = &iov;
> > +    msgh.msg_iovlen = 1;
> > +
> > +    if (fd_num) {
> > +        msgh.msg_control = control;
> > +        msgh.msg_controllen = sizeof(control);
> > +
> > +        cmsg = CMSG_FIRSTHDR(&msgh);
> > +
> > +        cmsg->cmsg_len = CMSG_LEN(fd_size);
> > +        cmsg->cmsg_level = SOL_SOCKET;
> > +        cmsg->cmsg_type = SCM_RIGHTS;
> > +        memcpy(CMSG_DATA(cmsg), fds, fd_size);
> > +    } else {
> > +        msgh.msg_control = 0;
> > +        msgh.msg_controllen = 0;
> > +    }
> > +
> > +    do {
> > +        r = sendmsg(fd, &msgh, 0);
> > +    } while (r < 0 && errno == EINTR);
> > +
> > +    if (r < 0) {
> > +        error_report("Failed to send msg(%d), reason: %s\n",
> > +                msg->request, strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (r != VHOST_USER_HDR_SIZE + msg->size) {
> > +        error_report("Failed to send msg(%d). Sent %d instead of
> %zu.\n",
> > +                msg->request, r, VHOST_USER_HDR_SIZE + msg->size);
> > +        goto fail;
> > +    }
> > +
> > +    return 0;
> > +
> > +fail:
> > +    return -1;
> > +}
> > +
> > +static int vhost_user_echo(struct vhost_dev *dev)
> > +{
> > +    VhostUserMsg msg;
> > +    int fd = dev->control;
> > +
> > +    if (fd < 0) {
> > +        return 0;
> > +    }
> > +
> > +    /* check connection */
> > +    msg.request = VHOST_USER_ECHO;
> > +    msg.flags = VHOST_USER_VERSION;
> > +    msg.size = 0;
> > +
> > +    if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
> > +        error_report("ECHO failed\n");
> > +        return -1;
> > +    }
> > +
> > +    if (vhost_user_recv(fd, &msg) < 0) {
> > +        error_report("ECHO failed\n");
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> >
> >  static int vhost_user_call(struct vhost_dev *dev, unsigned long int
> request,
> >          void *arg)
> >  {
> > +    int fd = dev->control;
> > +    VhostUserMsg msg;
> > +    int result = 0;
> > +    int fds[VHOST_MEMORY_MAX_NREGIONS];
> > +    size_t fd_num = 0;
> > +
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> > -    error_report("vhost_user_call not implemented\n");
> >
> > -    return -1;
> > +    if (fd < 0) {
> > +        return 0;
> > +    }
> > +
> > +    msg.request = VHOST_USER_NONE;
> > +    msg.flags = VHOST_USER_VERSION;
> > +    msg.size = 0;
> > +
> > +    switch (request) {
> > +    default:
> > +        error_report("vhost-user trying to send unhandled ioctl\n");
> > +        return -1;
> > +        break;
> > +    }
> > +
> > +    result = vhost_user_send_fds(fd, &msg, fds, fd_num);
> > +
> > +    return result;
> >  }
> >
> >  static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
> >  {
> > +    int fd = -1;
> > +    struct sockaddr_un un;
> > +    struct timeval tv;
> > +    size_t len;
> > +
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> > -    error_report("vhost_user_init not implemented\n");
> >
> > +    /* Create the socket */
> > +    fd = socket(AF_UNIX, SOCK_STREAM, 0);
> > +    if (fd == -1) {
> > +        error_report("socket: %s", strerror(errno));
> > +        return -1;
> > +    }
> > +
> > +    /* Set socket options */
> > +    tv.tv_sec = VHOST_USER_SOCKTO / 1000;
> > +    tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
> > +
> > +    if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct
> timeval))
> > +            == -1) {
> > +        error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct
> timeval))
> > +            == -1) {
> > +        error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    /* Connect */
> > +    un.sun_family = AF_UNIX;
> > +    strcpy(un.sun_path, devpath);
> > +
> > +    len = sizeof(un.sun_family) + strlen(devpath);
> > +
> > +    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
> > +        error_report("connect: %s", strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    /* Cleanup if there is previous connection left */
> > +    if (dev->control >= 0) {
> > +        dev->vhost_ops->vhost_backend_cleanup(dev);
> > +    }
> > +    dev->control = fd;
> > +
> > +    if (vhost_user_echo(dev) < 0) {
> > +        dev->control = -1;
> > +        goto fail;
> > +    }
> > +
> > +    return fd;
> > +
> > +fail:
> > +    close(fd);
> >      return -1;
> > +
> >  }
> >
> >  static int vhost_user_cleanup(struct vhost_dev *dev)
> >  {
> > +    int r = 0;
> > +
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> > -    error_report("vhost_user_cleanup not implemented\n");
> >
> > -    return -1;
> > +    if (dev->control >= 0) {
> > +        r = close(dev->control);
> > +    }
> > +    dev->control = -1;
> > +
> > +    return r;
> >  }
> >
> >  static const VhostOps user_ops = {
> > --
> > 1.8.3.2
> >
>

Reply via email to