If something abnormal happened to QEMU, 'connect()' can block calling
thread (e.g. main thread of OVS) forever or for a really long time.
This can break whole application or block the reconnection thread.

Example with OVS:

        ovs_rcu(urcu2)|WARN|blocked 512000 ms waiting for main to quiesce
        (gdb) bt
        #0  connect () from /lib64/libpthread.so.0
        #1  vhost_user_create_client (vsocket=0xa816e0)
        #2  rte_vhost_driver_register
        #3  netdev_dpdk_vhost_user_construct
        #4  netdev_open (name=0xa664b0 "vhost1")
        [...]
        #11 main

Fix that by setting non-blocking mode for client sockets for connection.

Fixes: 64ab701c3d1e ("vhost: add vhost-user client mode")

Signed-off-by: Ilya Maximets <i.maximets at samsung.com>
---
This was reproduced with current QEMU master branch
(commit 1ecfb24da987b862f) + patch-set "vhost-user reconnect fixes"
(https://lists.nongnu.org/archive/html/qemu-devel/2016-07/msg01547.html).

OVS was patched to support client mode:
http://openvswitch.org/pipermail/dev/2016-July/074972.html

Following script forces QEMU to fail to initialize vhost because
disconnection occures while device not fully configured:

        while true
        do
                ovs-vsctl set Interface vhost1 ofport_request=125
                ovs-vsctl set Interface vhost1 ofport_request=126
        done

As a result: QEMU still works, network interface broken and OVS main
             thread stalled inside 'connect()'.

Version 2:
        * EINPROGRESS not checked. EISCONN checked instead on
          the next iteration of reconnection loop.

 lib/librte_vhost/vhost_user/vhost-net-user.c | 62 ++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c 
b/lib/librte_vhost/vhost_user/vhost-net-user.c
index 8c6a096..63e0840 100644
--- a/lib/librte_vhost/vhost_user/vhost-net-user.c
+++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
@@ -43,6 +43,7 @@
 #include <sys/un.h>
 #include <sys/queue.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <pthread.h>

 #include <rte_log.h>
@@ -449,6 +450,14 @@ create_unix_socket(const char *path, struct sockaddr_un 
*un, bool is_server)
        RTE_LOG(INFO, VHOST_CONFIG, "vhost-user %s: socket created, fd: %d\n",
                is_server ? "server" : "client", fd);

+       if (!is_server && fcntl(fd, F_SETFL, O_NONBLOCK)) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "vhost-user: can't set nonblocking mode for socket, fd: 
"
+                       "%d (%s)\n", fd, strerror(errno));
+               close(fd);
+               return -1;
+       }
+
        memset(un, 0, sizeof(*un));
        un->sun_family = AF_UNIX;
        strncpy(un->sun_path, path, sizeof(un->sun_path));
@@ -516,9 +525,43 @@ struct vhost_user_reconnect_list {
 static struct vhost_user_reconnect_list reconn_list;
 static pthread_t reconn_tid;

+static int
+vhost_user_connect_nonblock(int fd, struct sockaddr *un, size_t sz)
+{
+       int ret, flags, so_error;
+       socklen_t len = sizeof(so_error);
+
+       errno = EINVAL;
+
+       ret = connect(fd, un, sz);
+       if (ret < 0 && errno != EISCONN)
+               return -1;
+
+       ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len);
+       if (ret < 0 || so_error) {
+               if (!ret)
+                       errno = so_error;
+               return -1;
+       }
+
+       flags = fcntl(fd, F_GETFL, 0);
+       if (flags < 0) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "can't get flags for connfd %d\n", fd);
+               return -2;
+       }
+       if ((flags & O_NONBLOCK) && fcntl(fd, F_SETFL, flags & ~O_NONBLOCK)) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                               "can't disable nonblocking on fd %d\n", fd);
+               return -2;
+       }
+       return 0;
+}
+
 static void *
 vhost_user_client_reconnect(void *arg __rte_unused)
 {
+       int ret;
        struct vhost_user_reconnect *reconn, *next;

        while (1) {
@@ -532,13 +575,23 @@ vhost_user_client_reconnect(void *arg __rte_unused)
                     reconn != NULL; reconn = next) {
                        next = TAILQ_NEXT(reconn, next);

-                       if (connect(reconn->fd, (struct sockaddr *)&reconn->un,
-                                   sizeof(reconn->un)) < 0)
+                       ret = vhost_user_connect_nonblock(reconn->fd,
+                                               (struct sockaddr *)&reconn->un,
+                                               sizeof(reconn->un));
+                       if (ret == -2) {
+                               close(reconn->fd);
+                               RTE_LOG(ERR, VHOST_CONFIG,
+                                       "reconnection for fd %d failed\n",
+                                       reconn->fd);
+                               goto remove_fd;
+                       }
+                       if (ret == -1)
                                continue;

                        RTE_LOG(INFO, VHOST_CONFIG,
                                "%s: connected\n", reconn->vsocket->path);
                        vhost_user_add_connection(reconn->fd, reconn->vsocket);
+remove_fd:
                        TAILQ_REMOVE(&reconn_list.head, reconn, next);
                        free(reconn);
                }
@@ -579,7 +632,8 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
        if (fd < 0)
                return -1;

-       ret = connect(fd, (struct sockaddr *)&un, sizeof(un));
+       ret = vhost_user_connect_nonblock(fd, (struct sockaddr *)&un,
+                                         sizeof(un));
        if (ret == 0) {
                vhost_user_add_connection(fd, vsocket);
                return 0;
@@ -589,7 +643,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
                "failed to connect to %s: %s\n",
                path, strerror(errno));

-       if (!vsocket->reconnect) {
+       if (ret == -2 || !vsocket->reconnect) {
                close(fd);
                return -1;
        }
-- 
2.7.4

Reply via email to