On Thu, Jul 08, 2021 at 11:28:40AM +0300, Denis Plotnikov wrote:
> On vhost-user-blk migration, qemu normally sends a number of commands
> to enable logging if VHOST_USER_PROTOCOL_F_LOG_SHMFD is negotiated.
> Qemu sends VHOST_USER_SET_FEATURES to enable buffers logging and
> VHOST_USER_SET_VRING_ADDR per each started ring to enable "used ring"
> data logging.
> The issue is that qemu doesn't wait for reply from the vhost daemon
> for these commands which may result in races between qemu expectation
> of logging starting and actual login starting in vhost daemon.
> 
> The race can appear as follows: on migration setup, qemu enables dirty page
> logging by sending VHOST_USER_SET_FEATURES. The command doesn't arrive to a
> vhost-user-blk daemon immediately and the daemon needs some time to turn the
> logging on internally. If qemu doesn't wait for reply, after sending the
> command, qemu may start migrate memory pages to a destination. At this time,
> the logging may not be actually turned on in the daemon but some guest pages,
> which the daemon is about to write to, may have already been transferred
> without logging to the destination. Since the logging wasn't turned on,
> those pages won't be transferred again as dirty. So we may end up with
> corrupted data on the destination.
> The same scenario is applicable for "used ring" data logging, which is
> turned on with VHOST_USER_SET_VRING_ADDR command.
> 
> To resolve this issue, this patch makes qemu wait for the commands result
> explicilty if VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated.
> 
> Signed-off-by: Denis Plotnikov <den-plotni...@yandex-team.ru>
> ---
>  hw/virtio/vhost-user.c | 31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index ee57abe04526..15b5fac67cf3 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -1105,10 +1105,20 @@ static int vhost_user_set_vring_addr(struct vhost_dev 
> *dev,
>          .hdr.size = sizeof(msg.payload.addr),
>      };
>  
> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
> +                                              
> VHOST_USER_PROTOCOL_F_REPLY_ACK);
> +    if (reply_supported) {
> +        msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> +    }
> +
>      if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>          return -1;
>      }
>  
> +    if (reply_supported) {
> +        return process_message_reply(dev, &msg);
> +    }
> +
>      return 0;
>  }
>  

Same - can we limit this to when logging is being enabled?

> @@ -1288,7 +1298,8 @@ static int vhost_user_set_vring_call(struct vhost_dev 
> *dev,
>      return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
>  }
>  
> -static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t 
> u64)
> +static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t 
> u64,
> +                              bool need_reply)
>  {
>      VhostUserMsg msg = {
>          .hdr.request = request,
> @@ -1297,23 +1308,37 @@ static int vhost_user_set_u64(struct vhost_dev *dev, 
> int request, uint64_t u64)
>          .hdr.size = sizeof(msg.payload.u64),
>      };
>  
> +    if (need_reply) {
> +        bool reply_supported = virtio_has_feature(dev->protocol_features,
> +                                          VHOST_USER_PROTOCOL_F_REPLY_ACK);
> +        if (reply_supported) {
> +            msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
> +        }
> +    }
> +
>      if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
>          return -1;
>      }
>  
> +    if (msg.hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
> +        return process_message_reply(dev, &msg);
> +    }
> +
>      return 0;
>  }
>  
>  static int vhost_user_set_features(struct vhost_dev *dev,
>                                     uint64_t features)
>  {
> -    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
> +    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features,
> +                              true);
>  }
>  

Same here. In fact, 

>  static int vhost_user_set_protocol_features(struct vhost_dev *dev,
>                                              uint64_t features)
>  {
> -    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, 
> features);
> +    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, 
> features,
> +                              false);
>  }
>  
>  static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t 
> *u64)
> -- 
> 2.25.1


Reply via email to