09.06.2018 18:32, Vladimir Sementsov-Ogievskiy wrote:
Implement reconnect. To achieve this:1. Move from quit bool variable to state. 4 states are introduced: connecting-wait: means, that reconnecting is in progress, and there were small number of reconnect attempts, so all requests are waiting for the connection. connecting-nowait: reconnecting is in progress, there were a lot of attempts of reconnect, all requests will return errors. connected: normal state quit: exiting after fatal error or on close Possible transitions are: * -> quit connecting-* -> connected connecting-wait -> connecting-nowait connected -> connecting-wait 2. Implement reconnect in connection_co. So, in connecting-* mode, connection_co, tries to reconnect every NBD_RECONNECT_NS. Configuring of this parameter (as well as NBD_RECONNECT_ATTEMPTS, which specifies bound of transition from connecting-wait to connecting-nowait) may be done as a follow-up patch. 3. Retry nbd queries on channel error, if we are in connecting-wait state. 4. In init, wait until for connection until transition to connecting-nowait. So, NBD_RECONNECT_ATTEMPTS is a bound of fail for initial connection too. Signed-off-by: Vladimir Sementsov-Ogievskiy <[email protected]> --- block/nbd-client.h | 2 + block/nbd-client.c | 170 ++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 123 insertions(+), 49 deletions(-) diff --git a/block/nbd-client.h b/block/nbd-client.h index 2561e1ea42..1249f2eb52 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -44,6 +44,8 @@ typedef struct NBDClientSession { bool receiving; int connect_status; Error *connect_err; + int connect_attempts; + bool wait_in_flight;NBDClientRequest requests[MAX_NBD_REQUESTS];NBDReply reply; diff --git a/block/nbd-client.c b/block/nbd-client.c index f22ed7f404..49b1f67047 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -41,10 +41,16 @@ static int nbd_client_connect(BlockDriverState *bs, const char *hostname, Error **errp);-/* @ret would be used for reconnect in future */static void nbd_channel_error(NBDClientSession *s, int ret) { - s->state = NBD_CLIENT_QUIT; + if (ret == -EIO) { + if (s->state == NBD_CLIENT_CONNECTED) { + s->state = NBD_CLIENT_CONNECTING_WAIT; + s->connect_attempts = 0; + } + } else { + s->state = NBD_CLIENT_QUIT; + } }static void nbd_recv_coroutines_wake_all(NBDClientSession *s)@@ -90,6 +96,19 @@ typedef struct NBDConnection { uint64_t reconnect_timeout; } NBDConnection;+static bool nbd_client_connecting(NBDClientSession *client)+{ + return client->state == NBD_CLIENT_CONNECTING_WAIT || + client->state == NBD_CLIENT_CONNECTING_NOWAIT || + client->state == NBD_CLIENT_CONNECTING_INIT; +} + +static bool nbd_client_connecting_wait(NBDClientSession *client) +{ + return client->state == NBD_CLIENT_CONNECTING_WAIT || + client->state == NBD_CLIENT_CONNECTING_INIT; +} + static coroutine_fn void nbd_connection_entry(void *opaque) { NBDConnection *con = opaque; @@ -98,26 +117,55 @@ static coroutine_fn void nbd_connection_entry(void *opaque) int ret = 0; Error *local_err = NULL;- if (con->reconnect_attempts != 0) {- error_setg(&s->connect_err, "Reconnect is not supported yet"); - s->connect_status = -EINVAL; - nbd_channel_error(s, s->connect_status); - return; - } + while (s->state != NBD_CLIENT_QUIT) { + assert(s->reply.handle == 0);- s->connect_status = nbd_client_connect(con->bs, con->saddr,- con->export, con->tlscreds, - con->hostname, &s->connect_err); - if (s->connect_status < 0) { - nbd_channel_error(s, s->connect_status); - return; - } + if (nbd_client_connecting(s)) { + if (s->connect_attempts == con->reconnect_attempts) { + s->state = NBD_CLIENT_CONNECTING_NOWAIT; + qemu_co_queue_restart_all(&s->free_sema); + }- /* successfully connected */- s->state = NBD_CLIENT_CONNECTED; + qemu_co_mutex_lock(&s->send_mutex); + + while (s->in_flight > 0) { + qemu_co_mutex_unlock(&s->send_mutex); + nbd_recv_coroutines_wake_all(s); + s->wait_in_flight = true; + qemu_coroutine_yield(); + s->wait_in_flight = false; + qemu_co_mutex_lock(&s->send_mutex); + } + + qemu_co_mutex_unlock(&s->send_mutex); + + /* Now we are sure, that nobody accessing the channel now and nobody + * will try to access the channel, until we set state to CONNECTED + */ + + s->connect_status = nbd_client_connect(con->bs, con->saddr, + con->export, con->tlscreds, + con->hostname, &local_err);
previous s->ioc leaked here. closing previous connection needs more actions
+ s->connect_attempts++; + error_free(s->connect_err); + s->connect_err = NULL; + error_propagate(&s->connect_err, local_err); + local_err = NULL; + if (s->connect_status == -EINVAL) { + /* Protocol error or something like this */ + nbd_channel_error(s, s->connect_status); + continue; + } + if (s->connect_status < 0) { + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, con->reconnect_timeout); + continue; + } + + /* successfully connected */ + s->state = NBD_CLIENT_CONNECTED; + qemu_co_queue_restart_all(&s->free_sema); + }- while (s->state != NBD_CLIENT_QUIT) {- assert(s->reply.handle == 0); s->receiving = true; ret = nbd_receive_reply(s->ioc, &s->reply, &local_err); s->receiving = false; @@ -158,6 +206,7 @@ stat
-- Best regards, Vladimir
