Re: [Qemu-devel] [PATCH v8 2/3] block/nbd: nbd reconnect

2019-08-22 Thread Vladimir Sementsov-Ogievskiy
21.08.2019 20:35, Eric Blake wrote:
> On 8/21/19 11:52 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Implement reconnect. To achieve this:
>>
>> 1. add new modes:
>> connecting-wait: means, that reconnecting is in progress, and there
>>   were small number of reconnect attempts, so all requests are
>>   waiting for the connection.
>> connecting-nowait: reconnecting is in progress, there were a lot of
>>   attempts of reconnect, all requests will return errors.
>>
>> two old modes are used too:
>> connected: normal state
>> quit: exiting after fatal error or on close
>>
>> Possible transitions are:
>>
>> * -> quit
>> connecting-* -> connected
>> connecting-wait -> connecting-nowait (transition is done after
>>reconnect-delay seconds in connecting-wait mode)
>> connected -> connecting-wait
>>
>> 2. Implement reconnect in connection_co. So, in connecting-* mode,
>>  connection_co, tries to reconnect unlimited times.
>>
>> 3. Retry nbd queries on channel error, if we are in connecting-wait
>>  state.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>> ---
> 
>> +static bool nbd_client_connecting(BDRVNBDState *s)
>> +{
>> +return s->state == NBD_CLIENT_CONNECTING_WAIT ||
>> +s->state == NBD_CLIENT_CONNECTING_NOWAIT;
> 
> 
> Indentation looks unusual. I might have done:
> 
>  return (s->state == NBD_CLIENT_CONNECTING_WAIT ||
>  s->state == NBD_CLIENT_CONNECTING_NOWAIT);
> 
> Or even exploit the enum encoding:
> 
>  return s->state <= NBD_CLIENT_CONNECTING_NOWAIT
> 
> Is s->state updated atomically, or do we risk the case where we might
> see two different values of s->state across the || sequence point?  Does
> that matter?

I hope it all happens in one aio context so state change should not intersects 
with this
function as it doesn't yield.

> 
>> +}
>> +
>> +static bool nbd_client_connecting_wait(BDRVNBDState *s)
>> +{
>> +return s->state == NBD_CLIENT_CONNECTING_WAIT;
>> +}
>> +
>> +static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
>> +{
>> +Error *local_err = NULL;
>> +
>> +if (!nbd_client_connecting(s)) {
>> +return;
>> +}
>> +assert(nbd_client_connecting(s));
> 
> This assert adds nothing given the condition beforehand.
> 
>> +
>> +/* Wait for completion of all in-flight requests */
>> +
>> +qemu_co_mutex_lock(>send_mutex);
>> +
>> +while (s->in_flight > 0) {
>> +qemu_co_mutex_unlock(>send_mutex);
>> +nbd_recv_coroutines_wake_all(s);
>> +s->wait_in_flight = true;
>> +qemu_coroutine_yield();
>> +s->wait_in_flight = false;
>> +qemu_co_mutex_lock(>send_mutex);
>> +}
>> +
>> +qemu_co_mutex_unlock(>send_mutex);
>> +
>> +if (!nbd_client_connecting(s)) {
>> +return;
>> +}
>> +
>> +/*
>> + * Now we are sure that nobody is accessing the channel, and no one will
>> + * try until we set the state to CONNECTED.
>> + */
>> +
>> +/* Finalize previous connection if any */
>> +if (s->ioc) {
>> +nbd_client_detach_aio_context(s->bs);
>> +object_unref(OBJECT(s->sioc));
>> +s->sioc = NULL;
>> +object_unref(OBJECT(s->ioc));
>> +s->ioc = NULL;
>> +}
>> +
>> +s->connect_status = nbd_client_connect(s->bs, _err);
>> +error_free(s->connect_err);
>> +s->connect_err = NULL;
>> +error_propagate(>connect_err, local_err);
>> +local_err = NULL;
>> +
>> +if (s->connect_status < 0) {
>> +/* failed attempt */
>> +return;
>> +}
>> +
>> +/* successfully connected */
>> +s->state = NBD_CLIENT_CONNECTED;
>> +qemu_co_queue_restart_all(>free_sema);
>> +}
>> +
>> +static coroutine_fn void nbd_reconnect_loop(BDRVNBDState *s)
>> +{
>> +uint64_t start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>> +uint64_t delay_ns = s->reconnect_delay * NANOSECONDS_PER_SECOND;
>> +uint64_t timeout = 1 * NANOSECONDS_PER_SECOND;
>> +uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND;
>> +
>> +nbd_reconnect_attempt(s);
>> +
>> +while (nbd_client_connecting(s)) {
>> +if (s->state == NBD_CLIENT_CONNECTING_WAIT &&
>> +qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time_ns > 
>> delay_ns)
>> +{
>> +s->state = NBD_CLIENT_CONNECTING_NOWAIT;
>> +qemu_co_queue_restart_all(>free_sema);
>> +}
>> +
>> +qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, timeout,
>> + >connection_co_sleep_ns_state);
>> +if (s->drained) {
>> +bdrv_dec_in_flight(s->bs);
>> +s->wait_drained_end = true;
>> +while (s->drained) {
>> +/*
>> + * We may be entered once from 
>> nbd_client_attach_aio_context_bh
>> + * and then from nbd_client_co_drain_end. So here is a loop.
>> + */
>> +qemu_coroutine_yield();
>> + 

Re: [Qemu-devel] [PATCH v8 2/3] block/nbd: nbd reconnect

2019-08-21 Thread Eric Blake
On 8/21/19 11:52 AM, Vladimir Sementsov-Ogievskiy wrote:
> Implement reconnect. To achieve this:
> 
> 1. add new modes:
>connecting-wait: means, that reconnecting is in progress, and there
>  were small number of reconnect attempts, so all requests are
>  waiting for the connection.
>connecting-nowait: reconnecting is in progress, there were a lot of
>  attempts of reconnect, all requests will return errors.
> 
>two old modes are used too:
>connected: normal state
>quit: exiting after fatal error or on close
> 
> Possible transitions are:
> 
>* -> quit
>connecting-* -> connected
>connecting-wait -> connecting-nowait (transition is done after
>   reconnect-delay seconds in connecting-wait mode)
>connected -> connecting-wait
> 
> 2. Implement reconnect in connection_co. So, in connecting-* mode,
> connection_co, tries to reconnect unlimited times.
> 
> 3. Retry nbd queries on channel error, if we are in connecting-wait
> state.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---

> +static bool nbd_client_connecting(BDRVNBDState *s)
> +{
> +return s->state == NBD_CLIENT_CONNECTING_WAIT ||
> +s->state == NBD_CLIENT_CONNECTING_NOWAIT;


Indentation looks unusual. I might have done:

return (s->state == NBD_CLIENT_CONNECTING_WAIT ||
s->state == NBD_CLIENT_CONNECTING_NOWAIT);

Or even exploit the enum encoding:

return s->state <= NBD_CLIENT_CONNECTING_NOWAIT

Is s->state updated atomically, or do we risk the case where we might
see two different values of s->state across the || sequence point?  Does
that matter?

> +}
> +
> +static bool nbd_client_connecting_wait(BDRVNBDState *s)
> +{
> +return s->state == NBD_CLIENT_CONNECTING_WAIT;
> +}
> +
> +static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
> +{
> +Error *local_err = NULL;
> +
> +if (!nbd_client_connecting(s)) {
> +return;
> +}
> +assert(nbd_client_connecting(s));

This assert adds nothing given the condition beforehand.

> +
> +/* Wait for completion of all in-flight requests */
> +
> +qemu_co_mutex_lock(>send_mutex);
> +
> +while (s->in_flight > 0) {
> +qemu_co_mutex_unlock(>send_mutex);
> +nbd_recv_coroutines_wake_all(s);
> +s->wait_in_flight = true;
> +qemu_coroutine_yield();
> +s->wait_in_flight = false;
> +qemu_co_mutex_lock(>send_mutex);
> +}
> +
> +qemu_co_mutex_unlock(>send_mutex);
> +
> +if (!nbd_client_connecting(s)) {
> +return;
> +}
> +
> +/*
> + * Now we are sure that nobody is accessing the channel, and no one will
> + * try until we set the state to CONNECTED.
> + */
> +
> +/* Finalize previous connection if any */
> +if (s->ioc) {
> +nbd_client_detach_aio_context(s->bs);
> +object_unref(OBJECT(s->sioc));
> +s->sioc = NULL;
> +object_unref(OBJECT(s->ioc));
> +s->ioc = NULL;
> +}
> +
> +s->connect_status = nbd_client_connect(s->bs, _err);
> +error_free(s->connect_err);
> +s->connect_err = NULL;
> +error_propagate(>connect_err, local_err);
> +local_err = NULL;
> +
> +if (s->connect_status < 0) {
> +/* failed attempt */
> +return;
> +}
> +
> +/* successfully connected */
> +s->state = NBD_CLIENT_CONNECTED;
> +qemu_co_queue_restart_all(>free_sema);
> +}
> +
> +static coroutine_fn void nbd_reconnect_loop(BDRVNBDState *s)
> +{
> +uint64_t start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> +uint64_t delay_ns = s->reconnect_delay * NANOSECONDS_PER_SECOND;
> +uint64_t timeout = 1 * NANOSECONDS_PER_SECOND;
> +uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND;
> +
> +nbd_reconnect_attempt(s);
> +
> +while (nbd_client_connecting(s)) {
> +if (s->state == NBD_CLIENT_CONNECTING_WAIT &&
> +qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time_ns > 
> delay_ns)
> +{
> +s->state = NBD_CLIENT_CONNECTING_NOWAIT;
> +qemu_co_queue_restart_all(>free_sema);
> +}
> +
> +qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, timeout,
> + >connection_co_sleep_ns_state);
> +if (s->drained) {
> +bdrv_dec_in_flight(s->bs);
> +s->wait_drained_end = true;
> +while (s->drained) {
> +/*
> + * We may be entered once from 
> nbd_client_attach_aio_context_bh
> + * and then from nbd_client_co_drain_end. So here is a loop.
> + */
> +qemu_coroutine_yield();
> +}
> +bdrv_inc_in_flight(s->bs);
> +}
> +if (timeout < max_timeout) {
> +timeout *= 2;
> +}
> +
> +nbd_reconnect_attempt(s);
> +}
>  }
>  
>  static coroutine_fn void nbd_connection_entry(void *opaque)
>  {
> -BDRVNBDState *s = opaque;
> +BDRVNBDState *s = 

[Qemu-devel] [PATCH v8 2/3] block/nbd: nbd reconnect

2019-08-21 Thread Vladimir Sementsov-Ogievskiy
Implement reconnect. To achieve this:

1. add new modes:
   connecting-wait: means, that reconnecting is in progress, and there
 were small number of reconnect attempts, so all requests are
 waiting for the connection.
   connecting-nowait: reconnecting is in progress, there were a lot of
 attempts of reconnect, all requests will return errors.

   two old modes are used too:
   connected: normal state
   quit: exiting after fatal error or on close

Possible transitions are:

   * -> quit
   connecting-* -> connected
   connecting-wait -> connecting-nowait (transition is done after
  reconnect-delay seconds in connecting-wait mode)
   connected -> connecting-wait

2. Implement reconnect in connection_co. So, in connecting-* mode,
connection_co, tries to reconnect unlimited times.

3. Retry nbd queries on channel error, if we are in connecting-wait
state.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/nbd.c | 335 ++--
 1 file changed, 271 insertions(+), 64 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index beed46fb34..f272154d4b 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1,6 +1,7 @@
 /*
  * QEMU Block driver for  NBD
  *
+ * Copyright (c) 2019 Virtuozzo International GmbH.
  * Copyright (C) 2016 Red Hat, Inc.
  * Copyright (C) 2008 Bull S.A.S.
  * Author: Laurent Vivier 
@@ -55,6 +56,8 @@ typedef struct {
 } NBDClientRequest;
 
 typedef enum NBDClientState {
+NBD_CLIENT_CONNECTING_WAIT,
+NBD_CLIENT_CONNECTING_NOWAIT,
 NBD_CLIENT_CONNECTED,
 NBD_CLIENT_QUIT
 } NBDClientState;
@@ -67,8 +70,14 @@ typedef struct BDRVNBDState {
 CoMutex send_mutex;
 CoQueue free_sema;
 Coroutine *connection_co;
+void *connection_co_sleep_ns_state;
+bool drained;
+bool wait_drained_end;
 int in_flight;
 NBDClientState state;
+int connect_status;
+Error *connect_err;
+bool wait_in_flight;
 
 NBDClientRequest requests[MAX_NBD_REQUESTS];
 NBDReply reply;
@@ -83,10 +92,21 @@ typedef struct BDRVNBDState {
 char *x_dirty_bitmap;
 } BDRVNBDState;
 
-/* @ret will be used for reconnect in future */
+static int nbd_client_connect(BlockDriverState *bs, Error **errp);
+
 static void nbd_channel_error(BDRVNBDState *s, int ret)
 {
-s->state = NBD_CLIENT_QUIT;
+if (ret == -EIO) {
+if (s->state == NBD_CLIENT_CONNECTED) {
+s->state = s->reconnect_delay ? NBD_CLIENT_CONNECTING_WAIT :
+NBD_CLIENT_CONNECTING_NOWAIT;
+}
+} else {
+if (s->state == NBD_CLIENT_CONNECTED) {
+qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+}
+s->state = NBD_CLIENT_QUIT;
+}
 }
 
 static void nbd_recv_coroutines_wake_all(BDRVNBDState *s)
@@ -129,7 +149,13 @@ static void nbd_client_attach_aio_context(BlockDriverState 
*bs,
 {
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
+/*
+ * s->connection_co is either yielded from nbd_receive_reply or from
+ * nbd_reconnect_loop()
+ */
+if (s->state == NBD_CLIENT_CONNECTED) {
+qio_channel_attach_aio_context(QIO_CHANNEL(s->ioc), new_context);
+}
 
 bdrv_inc_in_flight(bs);
 
@@ -140,29 +166,157 @@ static void 
nbd_client_attach_aio_context(BlockDriverState *bs,
 aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
 }
 
+static void coroutine_fn nbd_client_co_drain_begin(BlockDriverState *bs)
+{
+BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-static void nbd_teardown_connection(BlockDriverState *bs)
+s->drained = true;
+if (s->connection_co_sleep_ns_state) {
+qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
+}
+}
+
+static void coroutine_fn nbd_client_co_drain_end(BlockDriverState *bs)
 {
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-assert(s->ioc);
+s->drained = false;
+if (s->wait_drained_end) {
+s->wait_drained_end = false;
+aio_co_wake(s->connection_co);
+}
+}
+
 
-/* finish any pending coroutines */
-qio_channel_shutdown(s->ioc,
- QIO_CHANNEL_SHUTDOWN_BOTH,
- NULL);
+static void nbd_teardown_connection(BlockDriverState *bs)
+{
+BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+
+if (s->state == NBD_CLIENT_CONNECTED) {
+/* finish any pending coroutines */
+assert(s->ioc);
+qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+}
+s->state = NBD_CLIENT_QUIT;
+if (s->connection_co) {
+if (s->connection_co_sleep_ns_state) {
+qemu_co_sleep_wake(s->connection_co_sleep_ns_state);
+}
+}
 BDRV_POLL_WHILE(bs, s->connection_co);
+}
 
-nbd_client_detach_aio_context(bs);
-object_unref(OBJECT(s->sioc));
-s->sioc = NULL;
-object_unref(OBJECT(s->ioc));
-s->ioc = NULL;