The smallest possible timeout for reconnect to set in QAPI is 1 second. But for system where reconnect is considered a regular situation and backend is guaranteed to start in tens of milliseconds this timeout of 1 second becomes an extra throttling.
On the other hand, just allowing smaller timeout would be ineffective when we consider unplanned disconnect for larger time. Let's do a simple thing: without any change to API, start reconnecting with some small constant timeout (5 ms), increasing it exponentially up to user given value. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@yandex-team.ru> --- Hi all! We faced too long (1 second) pauses in IO during reconnect to vhost server, and this 1 second is exactly reconnect=1 parameter of char-socket. Here is our solution for the problem. chardev/char-socket.c | 21 ++++++++++++++++----- include/chardev/char-socket.h | 5 ++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/chardev/char-socket.c b/chardev/char-socket.c index 1ca9441b1b..4a35f75ebe 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -74,9 +74,10 @@ static void qemu_chr_socket_restart_timer(Chardev *chr) assert(!s->reconnect_timer); name = g_strdup_printf("chardev-socket-reconnect-%s", chr->label); s->reconnect_timer = qemu_chr_timeout_add_ms(chr, - s->reconnect_time * 1000, + s->reconnect_ms, socket_reconnect_timeout, chr); + s->reconnect_ms = MIN(s->reconnect_ms * 2, s->reconnect_ms_max); g_source_set_name(s->reconnect_timer, name); g_free(name); } @@ -481,7 +482,7 @@ static void tcp_chr_disconnect_locked(Chardev *chr) if (emit_close) { qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } - if (s->reconnect_time && !s->reconnect_timer) { + if (s->reconnect_ms && !s->reconnect_timer) { qemu_chr_socket_restart_timer(chr); } } @@ -1080,9 +1081,11 @@ static int tcp_chr_wait_connected(Chardev *chr, Error **errp) } else { Error *err = NULL; if (tcp_chr_connect_client_sync(chr, &err) < 0) { - if (s->reconnect_time) { + if (s->reconnect_ms) { error_free(err); - g_usleep(s->reconnect_time * 1000ULL * 1000ULL); + g_usleep(s->reconnect_ms * 1000ULL); + s->reconnect_ms = MIN(s->reconnect_ms * 2, + s->reconnect_ms_max); } else { error_propagate(errp, err); return -1; @@ -1091,6 +1094,10 @@ static int tcp_chr_wait_connected(Chardev *chr, Error **errp) } } + if (s->reconnect_ms) { + s->reconnect_ms = SOCKET_CHARDEV_RECONNECT_MS_MIN; + } + return 0; } @@ -1147,6 +1154,9 @@ static void qemu_chr_socket_connected(QIOTask *task, void *opaque) goto cleanup; } + if (s->reconnect_ms) { + s->reconnect_ms = SOCKET_CHARDEV_RECONNECT_MS_MIN; + } s->connect_err_reported = false; tcp_chr_new_client(chr, sioc); @@ -1273,7 +1283,8 @@ static int qmp_chardev_open_socket_client(Chardev *chr, SocketChardev *s = SOCKET_CHARDEV(chr); if (reconnect > 0) { - s->reconnect_time = reconnect; + s->reconnect_ms_max = reconnect * 1000; + s->reconnect_ms = SOCKET_CHARDEV_RECONNECT_MS_MIN; tcp_chr_connect_client_async(chr); return 0; } else { diff --git a/include/chardev/char-socket.h b/include/chardev/char-socket.h index 0708ca6fa9..f464c4c1c7 100644 --- a/include/chardev/char-socket.h +++ b/include/chardev/char-socket.h @@ -33,6 +33,8 @@ #define TCP_MAX_FDS 16 +#define SOCKET_CHARDEV_RECONNECT_MS_MIN 5 + typedef struct { char buf[21]; size_t buflen; @@ -74,7 +76,8 @@ struct SocketChardev { bool is_websock; GSource *reconnect_timer; - int64_t reconnect_time; + int64_t reconnect_ms_max; + int64_t reconnect_ms; bool connect_err_reported; QIOTask *connect_task; -- 2.34.1