from:"Yuri Benditovich"

Re: [PATCH 2/3] virtio-net: Convert feature properties to OnOffAuto

2024-04-30 Thread Yuri Benditovich

Question:
How will libvirt (as an example) work with this change. In the
existing semantic of libvirt profile the "on" means "on if possible"
and using existing profile after qemu update will still use "on" with
meaning "force"?
Typically this is solved by machine type - if libvirt uses
'machine='pc-q35-8.1'' this will be backward-compatible.
How will this change be accepted?

On Sun, Apr 28, 2024 at 10:21 AM Akihiko Odaki  wrote:
>
> Some features are not always available, and virtio-net used to disable
> them when not available even if the corresponding properties were
> explicitly set to "on".
>
> Convert feature properties to OnOffAuto so that the user can explicitly
> tell QEMU to automatically select the value by setting them "auto".
> QEMU will give an error if they are set "on".
>
> Signed-off-by: Akihiko Odaki 
> ---
>  include/hw/virtio/virtio-net.h |   2 +-
>  hw/net/virtio-net.c| 247 
> +
>  2 files changed, 152 insertions(+), 97 deletions(-)
>
> diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> index 060c23c04d2d..ff32e30f001b 100644
> --- a/include/hw/virtio/virtio-net.h
> +++ b/include/hw/virtio/virtio-net.h
> @@ -178,7 +178,7 @@ struct VirtIONet {
>  uint32_t has_vnet_hdr;
>  size_t host_hdr_len;
>  size_t guest_hdr_len;
> -uint64_t host_features;
> +OnOffAutoBit64 host_features;
>  uint32_t rsc_timeout;
>  uint8_t rsc4_enabled;
>  uint8_t rsc6_enabled;
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index c8059dc99bd4..5b6c901915a9 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -750,58 +750,96 @@ static void virtio_net_set_queue_pairs(VirtIONet *n)
>
>  static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
>
> +static bool virtio_net_clear_features(OnOffAutoBit64 *features,
> +  uint64_t clear_bits,
> +  const char *reason, Error **errp)
> +{
> +if (features->on_bits & clear_bits) {
> +error_setg(errp, "%s", reason);
> +return false;
> +}
> +
> +features->auto_bits &= ~clear_bits;
> +
> +return true;
> +}
> +
>  static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t 
> features,
>  Error **errp)
>  {
>  VirtIONet *n = VIRTIO_NET(vdev);
>  NetClientState *nc = qemu_get_queue(n->nic);
> -
> -/* Firstly sync all virtio-net possible supported features */
> -features |= n->host_features;
> -
> -virtio_add_feature(, VIRTIO_NET_F_MAC);
> -
> -if (!peer_has_vnet_hdr(n)) {
> -virtio_clear_feature(, VIRTIO_NET_F_CSUM);
> -virtio_clear_feature(, VIRTIO_NET_F_HOST_TSO4);
> -virtio_clear_feature(, VIRTIO_NET_F_HOST_TSO6);
> -virtio_clear_feature(, VIRTIO_NET_F_HOST_ECN);
> -
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_CSUM);
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_TSO4);
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_TSO6);
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_ECN);
> -
> -virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
> -
> -virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
> -}
> -
> -if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_UFO);
> -virtio_clear_feature(, VIRTIO_NET_F_HOST_UFO);
> -}
> -
> -if (!peer_has_uso(n)) {
> -virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
> -virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
> +OnOffAutoBit64 on_off_auto_features = n->host_features;
> +
> +on_off_auto_features.on_bits |= features;
> +virtio_add_feature(_off_auto_features.auto_bits, VIRTIO_NET_F_MAC);
> +
> +if (!((peer_has_vnet_hdr(n) ||
> +   virtio_net_clear_features(_off_auto_features,
> + BIT_ULL(VIRTIO_NET_F_CSUM) |
> + BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
> + BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
> + BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
> + BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
> + BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
> + BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
> + BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
> + BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
> + BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
> + BIT_ULL(VIRTIO_NET_F_HOST_USO) |
> + BIT_ULL(VIRTIO_NET_F_GUEST_USO4) |
> +

Re: [PATCH 1/3] qdev-properties: Add DEFINE_PROP_ON_OFF_AUTO_BIT64()

2024-04-30 Thread Yuri Benditovich

On Sun, Apr 28, 2024 at 10:21 AM Akihiko Odaki  wrote:
>
> DEFINE_PROP_ON_OFF_AUTO_BIT64() corresponds to DEFINE_PROP_ON_OFF_AUTO()
> as DEFINE_PROP_BIT64() corresponds to DEFINE_PROP_BOOL(). The difference
> is that DEFINE_PROP_ON_OFF_AUTO_BIT64() exposes OnOffAuto instead of
> bool.
>
> Signed-off-by: Akihiko Odaki 
> ---
>  include/hw/qdev-properties.h | 18 
>  hw/core/qdev-properties.c| 65 
> +++-
>  2 files changed, 82 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
> index 09aa04ca1e27..afec53a48470 100644
> --- a/include/hw/qdev-properties.h
> +++ b/include/hw/qdev-properties.h
> @@ -43,11 +43,22 @@ struct PropertyInfo {
>  ObjectPropertyRelease *release;
>  };
>
> +/**
> + * struct OnOffAutoBit64 - OnOffAuto storage with 64 elements.
> + * @on_bits: Bitmap of elements with "on".
> + * @auto_bits: Bitmap of elements with "auto".
> + */
> +typedef struct OnOffAutoBit64 {
> +uint64_t on_bits;
> +uint64_t auto_bits;
> +} OnOffAutoBit64;
> +
>
>  /*** qdev-properties.c ***/
>
>  extern const PropertyInfo qdev_prop_bit;
>  extern const PropertyInfo qdev_prop_bit64;
> +extern const PropertyInfo qdev_prop_on_off_auto_bit64;
>  extern const PropertyInfo qdev_prop_bool;
>  extern const PropertyInfo qdev_prop_enum;
>  extern const PropertyInfo qdev_prop_uint8;
> @@ -100,6 +111,13 @@ extern const PropertyInfo qdev_prop_link;
>  .set_default = true,  \
>  .defval.u  = (bool)_defval)
>
> +#define DEFINE_PROP_ON_OFF_AUTO_BIT64(_name, _state, _field, _bit, _defval) \
> +DEFINE_PROP(_name, _state, _field, qdev_prop_on_off_auto_bit64, \
> +OnOffAutoBit64, \
> +.bitnr= (_bit), \
> +.set_default = true,\
> +.defval.i = (OnOffAuto)_defval)
> +
>  #define DEFINE_PROP_BOOL(_name, _state, _field, _defval) \
>  DEFINE_PROP(_name, _state, _field, qdev_prop_bool, bool, \
>  .set_default = true, \
> diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
> index 7d6fa726fdf2..b96f54a1b912 100644
> --- a/hw/core/qdev-properties.c
> +++ b/hw/core/qdev-properties.c
> @@ -188,7 +188,8 @@ const PropertyInfo qdev_prop_bit = {
>
>  static uint64_t qdev_get_prop_mask64(Property *prop)
>  {
> -assert(prop->info == _prop_bit64);
> +assert(prop->info == _prop_bit64 ||
> +   prop->info == _prop_on_off_auto_bit64);
>  return 0x1ull << prop->bitnr;
>  }
>
> @@ -233,6 +234,68 @@ const PropertyInfo qdev_prop_bit64 = {
>  .set_default_value = set_default_value_bool,
>  };
>
> +static void prop_get_on_off_auto_bit64(Object *obj, Visitor *v,
> +   const char *name, void *opaque,
> +   Error **errp)
> +{
> +Property *prop = opaque;
> +OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop);
> +int value;
> +uint64_t mask = qdev_get_prop_mask64(prop);
> +
> +if (p->auto_bits & mask) {
> +value = ON_OFF_AUTO_AUTO;
> +} else if (p->on_bits & mask) {
> +value = ON_OFF_AUTO_ON;
> +} else {
> +value = ON_OFF_AUTO_OFF;
> +}
> +
> +visit_type_enum(v, name, , _lookup, errp);
> +}
> +
> +static void prop_set_on_off_auto_bit64(Object *obj, Visitor *v,
> +   const char *name, void *opaque,
> +   Error **errp)
> +{
> +Property *prop = opaque;
> +OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop);
> +int value;
> +uint64_t mask = qdev_get_prop_mask64(prop);
> +
> +if (!visit_type_enum(v, name, , _lookup, errp)) {
> +return;
> +}
> +
> +switch (value) {
> +case ON_OFF_AUTO_AUTO:
> +p->on_bits &= ~mask;
> +p->auto_bits |= mask;
> +break;
> +
> +case ON_OFF_AUTO_ON:
> +p->on_bits |= mask;
> +p->auto_bits &= ~mask;
> +break;
> +
> +case ON_OFF_AUTO_OFF:
> +p->on_bits &= ~mask;
> +p->auto_bits &= ~mask;
> +break;
> +}
> +}
> +
> +const PropertyInfo qdev_prop_on_off_auto_bit64 = {
> +.name  = "bool",

Does it mean that the name of this tristate type is "bool"? Or I miss something?

> +.description = "on/off/auto",
> +.enum_table = _lookup,
> +.get = qdev_propinfo_get_enum,
> +.set = qdev_propinfo_set_enum,
> +.get = prop_get_on_off_auto_bit64,
> +.set = prop_set_on_off_auto_bit64,
> +.set_default_value = qdev_propinfo_set_default_value_enum,
> +};
> +
>  /* --- bool --- */
>
>  static void get_bool(Object *obj, Visitor *v, const char *name, void *opaque,
>
> --
> 2.44.0
>

Re: [PATCH v9 13/20] virtio-net: Return an error when vhost cannot enable RSS

2024-04-20 Thread Yuri Benditovich

On Tue, Apr 16, 2024 at 9:54 AM Akihiko Odaki  wrote:
>
> On 2024/04/16 13:00, Jason Wang wrote:
> > On Mon, Apr 15, 2024 at 10:05 PM Yuri Benditovich
> >  wrote:
> >>
> >> On Wed, Apr 3, 2024 at 2:11 PM Akihiko Odaki  
> >> wrote:
> >>>
> >>> vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> >>> implicitly disables RSS even if the user explicitly requests it. Return
> >>> an error instead of implicitly disabling RSS if RSS is requested but not
> >>> available.
> >>>
> >>> Signed-off-by: Akihiko Odaki 
> >>> ---
> >>>   hw/net/virtio-net.c | 97 
> >>> ++---
> >>>   1 file changed, 48 insertions(+), 49 deletions(-)
> >>>
> >>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> >>> index 61b49e335dea..3d53eba88cfc 100644
> >>> --- a/hw/net/virtio-net.c
> >>> +++ b/hw/net/virtio-net.c
> >>> @@ -793,9 +793,6 @@ static uint64_t virtio_net_get_features(VirtIODevice 
> >>> *vdev, uint64_t features,
> >>>   return features;
> >>>   }
> >>>
> >>> -if (!ebpf_rss_is_loaded(>ebpf_rss)) {
> >>> -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> >>> -}
> >>>   features = vhost_net_get_features(get_vhost_net(nc->peer), 
> >>> features);
> >>>   vdev->backend_features = features;
> >>>
> >>> @@ -3591,6 +3588,50 @@ static bool 
> >>> failover_hide_primary_device(DeviceListener *listener,
> >>>   return qatomic_read(>failover_primary_hidden);
> >>>   }
> >>>
> >>> +static void virtio_net_device_unrealize(DeviceState *dev)
> >>> +{
> >>> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>> +VirtIONet *n = VIRTIO_NET(dev);
> >>> +int i, max_queue_pairs;
> >>> +
> >>> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> >>> +virtio_net_unload_ebpf(n);
> >>> +}
> >>> +
> >>> +/* This will stop vhost backend if appropriate. */
> >>> +virtio_net_set_status(vdev, 0);
> >>> +
> >>> +g_free(n->netclient_name);
> >>> +n->netclient_name = NULL;
> >>> +g_free(n->netclient_type);
> >>> +n->netclient_type = NULL;
> >>> +
> >>> +g_free(n->mac_table.macs);
> >>> +g_free(n->vlans);
> >>> +
> >>> +if (n->failover) {
> >>> +qobject_unref(n->primary_opts);
> >>> +device_listener_unregister(>primary_listener);
> >>> +migration_remove_notifier(>migration_state);
> >>> +} else {
> >>> +assert(n->primary_opts == NULL);
> >>> +}
> >>> +
> >>> +max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> >>> +for (i = 0; i < max_queue_pairs; i++) {
> >>> +virtio_net_del_queue(n, i);
> >>> +}
> >>> +/* delete also control vq */
> >>> +virtio_del_queue(vdev, max_queue_pairs * 2);
> >>> +qemu_announce_timer_del(>announce_timer, false);
> >>> +g_free(n->vqs);
> >>> +qemu_del_nic(n->nic);
> >>> +virtio_net_rsc_cleanup(n);
> >>> +g_free(n->rss_data.indirections_table);
> >>> +net_rx_pkt_uninit(n->rx_pkt);
> >>> +virtio_cleanup(vdev);
> >>> +}
> >>> +
> >>>   static void virtio_net_device_realize(DeviceState *dev, Error **errp)
> >>>   {
> >>>   VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>> @@ -3760,53 +3801,11 @@ static void virtio_net_device_realize(DeviceState 
> >>> *dev, Error **errp)
> >>>
> >>>   net_rx_pkt_init(>rx_pkt);
> >>>
> >>> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> >>> -virtio_net_load_ebpf(n);
> >>> -}
> >>> -}
> >>> -
> >>> -static void virtio_net_device_unrealize(DeviceState *dev)
> >>> -{
> >>> -VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> >>> -VirtIONet *n = VIRTIO_NET(dev);
> >>> -int i, max_queue_pairs;
> >>> -
> >>> -if (virtio_has_f

Re: [PATCH v9 13/20] virtio-net: Return an error when vhost cannot enable RSS

2024-04-16 Thread Yuri Benditovich

On Wed, Apr 3, 2024 at 2:11 PM Akihiko Odaki  wrote:
>
> vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> implicitly disables RSS even if the user explicitly requests it. Return
> an error instead of implicitly disabling RSS if RSS is requested but not
> available.
>
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/virtio-net.c | 97 
> ++---
>  1 file changed, 48 insertions(+), 49 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 61b49e335dea..3d53eba88cfc 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -793,9 +793,6 @@ static uint64_t virtio_net_get_features(VirtIODevice 
> *vdev, uint64_t features,
>  return features;
>  }
>
> -if (!ebpf_rss_is_loaded(>ebpf_rss)) {
> -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> -}
>  features = vhost_net_get_features(get_vhost_net(nc->peer), features);
>  vdev->backend_features = features;
>
> @@ -3591,6 +3588,50 @@ static bool 
> failover_hide_primary_device(DeviceListener *listener,
>  return qatomic_read(>failover_primary_hidden);
>  }
>
> +static void virtio_net_device_unrealize(DeviceState *dev)
> +{
> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +VirtIONet *n = VIRTIO_NET(dev);
> +int i, max_queue_pairs;
> +
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> +virtio_net_unload_ebpf(n);
> +}
> +
> +/* This will stop vhost backend if appropriate. */
> +virtio_net_set_status(vdev, 0);
> +
> +g_free(n->netclient_name);
> +n->netclient_name = NULL;
> +g_free(n->netclient_type);
> +n->netclient_type = NULL;
> +
> +g_free(n->mac_table.macs);
> +g_free(n->vlans);
> +
> +if (n->failover) {
> +qobject_unref(n->primary_opts);
> +device_listener_unregister(>primary_listener);
> +migration_remove_notifier(>migration_state);
> +} else {
> +assert(n->primary_opts == NULL);
> +}
> +
> +max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +for (i = 0; i < max_queue_pairs; i++) {
> +virtio_net_del_queue(n, i);
> +}
> +/* delete also control vq */
> +virtio_del_queue(vdev, max_queue_pairs * 2);
> +qemu_announce_timer_del(>announce_timer, false);
> +g_free(n->vqs);
> +qemu_del_nic(n->nic);
> +virtio_net_rsc_cleanup(n);
> +g_free(n->rss_data.indirections_table);
> +net_rx_pkt_uninit(n->rx_pkt);
> +virtio_cleanup(vdev);
> +}
> +
>  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -3760,53 +3801,11 @@ static void virtio_net_device_realize(DeviceState 
> *dev, Error **errp)
>
>  net_rx_pkt_init(>rx_pkt);
>
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_load_ebpf(n);
> -}
> -}
> -
> -static void virtio_net_device_unrealize(DeviceState *dev)
> -{
> -VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> -VirtIONet *n = VIRTIO_NET(dev);
> -int i, max_queue_pairs;
> -
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_unload_ebpf(n);
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS) &&
> +!virtio_net_load_ebpf(n) && get_vhost_net(nc->peer)) {
> +virtio_net_device_unrealize(dev);
> +error_setg(errp, "Can't load eBPF RSS for vhost");

One more thing:
In case of failure the message (it will be visible to the user, if I'm
not mistaken)
should be more clear, with a suggestion to disable the 'rss' feature.

>  }
> -
> -/* This will stop vhost backend if appropriate. */
> -virtio_net_set_status(vdev, 0);
> -
> -g_free(n->netclient_name);
> -n->netclient_name = NULL;
> -g_free(n->netclient_type);
> -n->netclient_type = NULL;
> -
> -g_free(n->mac_table.macs);
> -g_free(n->vlans);
> -
> -if (n->failover) {
> -qobject_unref(n->primary_opts);
> -device_listener_unregister(>primary_listener);
> -migration_remove_notifier(>migration_state);
> -} else {
> -assert(n->primary_opts == NULL);
> -}
> -
> -max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> -for (i = 0; i < max_queue_pairs; i++) {
> -virtio_net_del_queue(n, i);
> -}
> -/* delete also control vq */
> -virtio_del_queue(vdev, max_queue_pairs * 2);
> -qemu_announce_timer_del(>announce_timer, false);
> -g_free(n->vqs);
> -qemu_del_nic(n->nic);
> -virtio_net_rsc_cleanup(n);
> -g_free(n->rss_data.indirections_table);
> -net_rx_pkt_uninit(n->rx_pkt);
> -virtio_cleanup(vdev);
>  }
>
>  static void virtio_net_reset(VirtIODevice *vdev)
>
> --
> 2.44.0
>

Re: [PATCH v9 13/20] virtio-net: Return an error when vhost cannot enable RSS

2024-04-16 Thread Yuri Benditovich

On Tue, Apr 16, 2024 at 10:14 AM Jason Wang  wrote:
>
> On Tue, Apr 16, 2024 at 1:43 PM Yuri Benditovich
>  wrote:
> >
> > On Tue, Apr 16, 2024 at 7:00 AM Jason Wang  wrote:
> > >
> > > On Mon, Apr 15, 2024 at 10:05 PM Yuri Benditovich
> > >  wrote:
> > > >
> > > > On Wed, Apr 3, 2024 at 2:11 PM Akihiko Odaki  
> > > > wrote:
> > > > >
> > > > > vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> > > > > implicitly disables RSS even if the user explicitly requests it. 
> > > > > Return
> > > > > an error instead of implicitly disabling RSS if RSS is requested but 
> > > > > not
> > > > > available.
> > > > >
> > > > > Signed-off-by: Akihiko Odaki 
> > > > > ---
> > > > >  hw/net/virtio-net.c | 97 
> > > > > ++---
> > > > >  1 file changed, 48 insertions(+), 49 deletions(-)
> > > > >
> > > > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > > > > index 61b49e335dea..3d53eba88cfc 100644
> > > > > --- a/hw/net/virtio-net.c
> > > > > +++ b/hw/net/virtio-net.c
> > > > > @@ -793,9 +793,6 @@ static uint64_t 
> > > > > virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
> > > > >  return features;
> > > > >  }
> > > > >
> > > > > -if (!ebpf_rss_is_loaded(>ebpf_rss)) {
> > > > > -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > > > > -}
> > > > >  features = vhost_net_get_features(get_vhost_net(nc->peer), 
> > > > > features);
> > > > >  vdev->backend_features = features;
> > > > >
> > > > > @@ -3591,6 +3588,50 @@ static bool 
> > > > > failover_hide_primary_device(DeviceListener *listener,
> > > > >  return qatomic_read(>failover_primary_hidden);
> > > > >  }
> > > > >
> > > > > +static void virtio_net_device_unrealize(DeviceState *dev)
> > > > > +{
> > > > > +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > > > +VirtIONet *n = VIRTIO_NET(dev);
> > > > > +int i, max_queue_pairs;
> > > > > +
> > > > > +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> > > > > +virtio_net_unload_ebpf(n);
> > > > > +}
> > > > > +
> > > > > +/* This will stop vhost backend if appropriate. */
> > > > > +virtio_net_set_status(vdev, 0);
> > > > > +
> > > > > +g_free(n->netclient_name);
> > > > > +n->netclient_name = NULL;
> > > > > +g_free(n->netclient_type);
> > > > > +n->netclient_type = NULL;
> > > > > +
> > > > > +g_free(n->mac_table.macs);
> > > > > +g_free(n->vlans);
> > > > > +
> > > > > +if (n->failover) {
> > > > > +qobject_unref(n->primary_opts);
> > > > > +device_listener_unregister(>primary_listener);
> > > > > +migration_remove_notifier(>migration_state);
> > > > > +} else {
> > > > > +assert(n->primary_opts == NULL);
> > > > > +}
> > > > > +
> > > > > +max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> > > > > +for (i = 0; i < max_queue_pairs; i++) {
> > > > > +virtio_net_del_queue(n, i);
> > > > > +}
> > > > > +/* delete also control vq */
> > > > > +virtio_del_queue(vdev, max_queue_pairs * 2);
> > > > > +qemu_announce_timer_del(>announce_timer, false);
> > > > > +g_free(n->vqs);
> > > > > +qemu_del_nic(n->nic);
> > > > > +virtio_net_rsc_cleanup(n);
> > > > > +g_free(n->rss_data.indirections_table);
> > > > > +net_rx_pkt_uninit(n->rx_pkt);
> > > > > +virtio_cleanup(vdev);
> > > > > +}
> > > > > +
> > > > >  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
> > > > >  {
> > > > >  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > &g

Re: [PATCH v9 13/20] virtio-net: Return an error when vhost cannot enable RSS

2024-04-15 Thread Yuri Benditovich

On Tue, Apr 16, 2024 at 7:00 AM Jason Wang  wrote:
>
> On Mon, Apr 15, 2024 at 10:05 PM Yuri Benditovich
>  wrote:
> >
> > On Wed, Apr 3, 2024 at 2:11 PM Akihiko Odaki  
> > wrote:
> > >
> > > vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> > > implicitly disables RSS even if the user explicitly requests it. Return
> > > an error instead of implicitly disabling RSS if RSS is requested but not
> > > available.
> > >
> > > Signed-off-by: Akihiko Odaki 
> > > ---
> > >  hw/net/virtio-net.c | 97 
> > > ++---
> > >  1 file changed, 48 insertions(+), 49 deletions(-)
> > >
> > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > > index 61b49e335dea..3d53eba88cfc 100644
> > > --- a/hw/net/virtio-net.c
> > > +++ b/hw/net/virtio-net.c
> > > @@ -793,9 +793,6 @@ static uint64_t virtio_net_get_features(VirtIODevice 
> > > *vdev, uint64_t features,
> > >  return features;
> > >  }
> > >
> > > -if (!ebpf_rss_is_loaded(>ebpf_rss)) {
> > > -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > > -}
> > >  features = vhost_net_get_features(get_vhost_net(nc->peer), features);
> > >  vdev->backend_features = features;
> > >
> > > @@ -3591,6 +3588,50 @@ static bool 
> > > failover_hide_primary_device(DeviceListener *listener,
> > >  return qatomic_read(>failover_primary_hidden);
> > >  }
> > >
> > > +static void virtio_net_device_unrealize(DeviceState *dev)
> > > +{
> > > +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > +VirtIONet *n = VIRTIO_NET(dev);
> > > +int i, max_queue_pairs;
> > > +
> > > +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> > > +virtio_net_unload_ebpf(n);
> > > +}
> > > +
> > > +/* This will stop vhost backend if appropriate. */
> > > +virtio_net_set_status(vdev, 0);
> > > +
> > > +g_free(n->netclient_name);
> > > +n->netclient_name = NULL;
> > > +g_free(n->netclient_type);
> > > +n->netclient_type = NULL;
> > > +
> > > +g_free(n->mac_table.macs);
> > > +g_free(n->vlans);
> > > +
> > > +if (n->failover) {
> > > +qobject_unref(n->primary_opts);
> > > +device_listener_unregister(>primary_listener);
> > > +migration_remove_notifier(>migration_state);
> > > +} else {
> > > +assert(n->primary_opts == NULL);
> > > +}
> > > +
> > > +max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> > > +for (i = 0; i < max_queue_pairs; i++) {
> > > +virtio_net_del_queue(n, i);
> > > +}
> > > +/* delete also control vq */
> > > +virtio_del_queue(vdev, max_queue_pairs * 2);
> > > +qemu_announce_timer_del(>announce_timer, false);
> > > +g_free(n->vqs);
> > > +qemu_del_nic(n->nic);
> > > +virtio_net_rsc_cleanup(n);
> > > +g_free(n->rss_data.indirections_table);
> > > +net_rx_pkt_uninit(n->rx_pkt);
> > > +virtio_cleanup(vdev);
> > > +}
> > > +
> > >  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
> > >  {
> > >  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > @@ -3760,53 +3801,11 @@ static void virtio_net_device_realize(DeviceState 
> > > *dev, Error **errp)
> > >
> > >  net_rx_pkt_init(>rx_pkt);
> > >
> > > -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> > > -virtio_net_load_ebpf(n);
> > > -}
> > > -}
> > > -
> > > -static void virtio_net_device_unrealize(DeviceState *dev)
> > > -{
> > > -VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> > > -VirtIONet *n = VIRTIO_NET(dev);
> > > -int i, max_queue_pairs;
> > > -
> > > -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> > > -virtio_net_unload_ebpf(n);
> > > +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS) &&
> > > +!virtio_net_load_ebpf(n) && get_vhost_net(nc->peer)) {
> > > +virtio_net_device_unrealize(dev);
> > > +error_setg(errp, "

Re: [PATCH v9 13/20] virtio-net: Return an error when vhost cannot enable RSS

2024-04-15 Thread Yuri Benditovich

On Wed, Apr 3, 2024 at 2:11 PM Akihiko Odaki  wrote:
>
> vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> implicitly disables RSS even if the user explicitly requests it. Return
> an error instead of implicitly disabling RSS if RSS is requested but not
> available.
>
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/virtio-net.c | 97 
> ++---
>  1 file changed, 48 insertions(+), 49 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 61b49e335dea..3d53eba88cfc 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -793,9 +793,6 @@ static uint64_t virtio_net_get_features(VirtIODevice 
> *vdev, uint64_t features,
>  return features;
>  }
>
> -if (!ebpf_rss_is_loaded(>ebpf_rss)) {
> -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> -}
>  features = vhost_net_get_features(get_vhost_net(nc->peer), features);
>  vdev->backend_features = features;
>
> @@ -3591,6 +3588,50 @@ static bool 
> failover_hide_primary_device(DeviceListener *listener,
>  return qatomic_read(>failover_primary_hidden);
>  }
>
> +static void virtio_net_device_unrealize(DeviceState *dev)
> +{
> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +VirtIONet *n = VIRTIO_NET(dev);
> +int i, max_queue_pairs;
> +
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> +virtio_net_unload_ebpf(n);
> +}
> +
> +/* This will stop vhost backend if appropriate. */
> +virtio_net_set_status(vdev, 0);
> +
> +g_free(n->netclient_name);
> +n->netclient_name = NULL;
> +g_free(n->netclient_type);
> +n->netclient_type = NULL;
> +
> +g_free(n->mac_table.macs);
> +g_free(n->vlans);
> +
> +if (n->failover) {
> +qobject_unref(n->primary_opts);
> +device_listener_unregister(>primary_listener);
> +migration_remove_notifier(>migration_state);
> +} else {
> +assert(n->primary_opts == NULL);
> +}
> +
> +max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +for (i = 0; i < max_queue_pairs; i++) {
> +virtio_net_del_queue(n, i);
> +}
> +/* delete also control vq */
> +virtio_del_queue(vdev, max_queue_pairs * 2);
> +qemu_announce_timer_del(>announce_timer, false);
> +g_free(n->vqs);
> +qemu_del_nic(n->nic);
> +virtio_net_rsc_cleanup(n);
> +g_free(n->rss_data.indirections_table);
> +net_rx_pkt_uninit(n->rx_pkt);
> +virtio_cleanup(vdev);
> +}
> +
>  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -3760,53 +3801,11 @@ static void virtio_net_device_realize(DeviceState 
> *dev, Error **errp)
>
>  net_rx_pkt_init(>rx_pkt);
>
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_load_ebpf(n);
> -}
> -}
> -
> -static void virtio_net_device_unrealize(DeviceState *dev)
> -{
> -VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> -VirtIONet *n = VIRTIO_NET(dev);
> -int i, max_queue_pairs;
> -
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_unload_ebpf(n);
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS) &&
> +!virtio_net_load_ebpf(n) && get_vhost_net(nc->peer)) {
> +virtio_net_device_unrealize(dev);
> +error_setg(errp, "Can't load eBPF RSS for vhost");
>  }

As I already mentioned, I think this is an extremely bad idea to
fail to run qemu due to such a reason as .absence of one feature.
What I suggest is:
1. Redefine rss as tri-state (off|auto|on)
2. Fail to run only if rss is on and not available via ebpf
3. On auto - silently drop it
4. The same with 'hash' option - it is not compatible with vhost (at
least at the moment)
5. Reformat the patch as it is hard to review it due to replacing
entire procedures, i.e. one patch with replacing without changes,
another one - with real changes.
If this is hard to review only for me - please ignore that.

> -
> -/* This will stop vhost backend if appropriate. */
> -virtio_net_set_status(vdev, 0);
> -
> -g_free(n->netclient_name);
> -n->netclient_name = NULL;
> -g_free(n->netclient_type);
> -n->netclient_type = NULL;
> -
> -g_free(n->mac_table.macs);
> -g_free(n->vlans);
> -
> -if (n->failover) {
> -qobject_unref(n->primary_opts);
> -device_listener_unregister(>primary_listener);
> -migration_remove_notifier(>migration_state);
> -} else {
> -assert(n->primary_opts == NULL);
> -}
> -
> -max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> -for (i = 0; i < max_queue_pairs; i++) {
> -virtio_net_del_queue(n, i);
> -}
> -/* delete also control vq */
> -virtio_del_queue(vdev, max_queue_pairs * 2);
> -qemu_announce_timer_del(>announce_timer, false);
> -g_free(n->vqs);
> -qemu_del_nic(n->nic);
> -

Re: [PATCH v9 17/20] ebpf: Fix RSS error handling

2024-04-13 Thread Yuri Benditovich

On Wed, Apr 3, 2024 at 2:12 PM Akihiko Odaki  wrote:
>
> calculate_rss_hash() was using hash value 0 to tell if it calculated
> a hash, but the hash value may be 0 on a rare occasion. Have a
> distinct bool value for correctness.

This is interesting question whether in reality the hash value might
be 0 or not.
On one hand - this seems like a kind of fix.
On another hard - this adds computation cycles for each packet, and the
corner case that this targets to fix seems hardly reachable if at all.
Optimistic estimation is 2.5*10^-8 percent of address:address:port triplets.
I would suggest at least to find some proof of the fact that the calculated
hash might be 0 in real case where source addresses are not random.

>
> Fixes: f3fa412de2 ("ebpf: Added eBPF RSS program.")
> Signed-off-by: Akihiko Odaki 
> ---
>  ebpf/rss.bpf.skeleton.h | 1210 
> +++
>  tools/ebpf/rss.bpf.c|   20 +-
>  2 files changed, 610 insertions(+), 620 deletions(-)
>
> diff --git a/ebpf/rss.bpf.skeleton.h b/ebpf/rss.bpf.skeleton.h
> index aed4ef9a0335..e41ed8890191 100644
> --- a/ebpf/rss.bpf.skeleton.h
> +++ b/ebpf/rss.bpf.skeleton.h
> @@ -165,7 +165,7 @@ rss_bpf__create_skeleton(struct rss_bpf *obj)
> s->progs[0].prog = >progs.tun_rss_steering_prog;
> s->progs[0].link = >links.tun_rss_steering_prog;
>
> -   s->data = (void *)rss_bpf__elf_bytes(>data_sz);
> +   s->data = rss_bpf__elf_bytes(>data_sz);
>
> obj->skeleton = s;
> return 0;
> @@ -176,194 +176,188 @@ err:
>
>  static inline const void *rss_bpf__elf_bytes(size_t *sz)
>  {
> -   *sz = 20600;
> -   return (const void *)"\
> +   static const char data[] __attribute__((__aligned__(8))) = "\
>  
> \x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
> -\0\0\0\0\0\0\0\0\0\0\0\x38\x4d\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0d\0\
> -\x01\0\xbf\x19\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x4c\xff\0\0\0\0\xbf\xa7\
> -\0\0\0\0\0\0\x07\x07\0\0\x4c\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
> +\0\0\0\0\0\0\0\0\0\0\0\xb8\x4b\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0d\0\
> +\x01\0\xbf\x19\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x54\xff\0\0\0\0\xbf\xa7\
> +\0\0\0\0\0\0\x07\x07\0\0\x54\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
>  
> \xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x06\0\0\0\0\0\0\x18\x01\0\0\0\0\0\
>  
> \0\0\0\0\0\0\0\0\0\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x07\0\0\0\0\0\0\
> -\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x61\x02\0\0\0\0\xbf\x78\0\0\
> -\0\0\0\0\x15\x08\x5f\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
> -\0\x58\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc0\xff\0\0\0\0\x7b\x1a\xb8\xff\
> -\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\
> -\0\x63\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\x1a\x88\xff\0\0\0\0\x7b\
> -\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\x70\xff\0\0\0\0\x7b\x1a\
> -\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\xff\0\0\0\0\x7b\x1a\x50\
> -\xff\0\0\0\0\x15\x09\x47\x02\0\0\0\0\x6b\x1a\xc8\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
> -\0\x07\x03\0\0\xc8\xff\xff\xff\xbf\x91\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
> +\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x4f\x02\0\0\0\0\xbf\x78\0\0\
> +\0\0\0\0\x15\x08\x4d\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
> +\0\x46\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc8\xff\0\0\0\0\x7b\x1a\xc0\xff\
> +\0\0\0\0\x7b\x1a\xb8\xff\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\
> +\0\x63\x1a\xa0\xff\0\0\0\0\x7b\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\
> +\x1a\x88\xff\0\0\0\0\x7b\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\
> +\x70\xff\0\0\0\0\x7b\x1a\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\
> +\xff\0\0\0\0\x15\x09\x35\x02\0\0\0\0\x6b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
> +\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x91\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
>  
> \x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\
> -\x77\0\0\0\x20\0\0\0\x55\0\x3c\x02\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xc8\
> +\x77\0\0\0\x20\0\0\0\x55\0\x2a\x02\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xd0\
>  
> \xff\0\0\0\0\xbf\x13\0\0\0\0\0\0\xdc\x03\0\0\x10\0\0\0\x15\x03\x02\0\0\x81\0\0\
>  
> \x55\x03\x0b\0\xa8\x88\0\0\xb7\x02\0\0\x14\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\
> -\0\xc8\xff\xff\xff\xbf\x91\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\
> -\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x2c\x02\0\
> -\0\0\0\x69\xa1\xc8\xff\0\0\0\0\x15\x01\x2a\x02\0\0\0\0\x7b\x9a\x38\xff\0\0\0\0\
> -\x15\x01\x56\0\x86\xdd\0\0\x55\x01\x3b\0\x08\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\
> -\x1a\x50\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\
> -\xff\0\0\0\0\x7b\x1a\xc8\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xc8\xff\
> -\xff\xff\x79\xa1\x38\xff\0\0\0\0\xb7\x02\0\0\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\
>

Re: [PATCH v9 16/20] virtio-net: Do not write hashes to peer buffer

2024-04-08 Thread Yuri Benditovich

On Mon, Apr 8, 2024 at 10:57 AM Akihiko Odaki  wrote:
>
> On 2024/04/08 16:54, Yuri Benditovich wrote:
> > On Mon, Apr 8, 2024 at 10:42 AM Akihiko Odaki  
> > wrote:
> >>
> >> On 2024/04/08 16:40, Yuri Benditovich wrote:
> >>> On Mon, Apr 8, 2024 at 4:30 AM Akihiko Odaki  
> >>> wrote:
> >>>>
> >>>> On 2024/04/08 7:09, Yuri Benditovich wrote:
> >>>>> On Wed, Apr 3, 2024 at 2:12 PM Akihiko Odaki  
> >>>>> wrote:
> >>>>>>
> >>>>>> The peer buffer is qualified with const and not meant to be modified.
> >>>>>
> >>>>> IMHO, this buffer is not so 'const' (although the prototype states so),
> >>>>> it is allocated in net.c
> >>>>> btw, another procedure in this file also modifies the buffer
> >>>>> (work_around_broken_dhclient)
> >>>>
> >>>> Right but it has a FIXME comment.
> >>>>
> >>>>>
> >>>>>> It also prevents enabling VIRTIO_NET_F_HASH_REPORT for peers without
> >>>>>> virtio-net header support.
> >>>>>
> >>>>> Does it mean _this commit_ prevents enabling VIRTIO_NET_F_HASH_REPORT
> >>>>> for peers without
> >>>>> virtio-net header support? Where?
> >>>>
> >>>> No, but I meant that this patch fixes such a problem.
> >>>
> >>> No, it does not. Such a problem does not exist in the master, the
> >>> hash_report feature
> >>> is silently dropped in such case:
> >>> https://github.com/qemu/qemu/blob/master/hw/net/virtio-net.c#L816
> >>
> >> Well, silently dropping VIRTIO_NET_F_HASH_REPORT is not different from
> >> preventing enabling VIRTIO_NET_F_HASH_REPORT, is it?
> >>
> > But how is your patch involved in it? Should this line be removed from
> > the commit message?
>
> In the master, VIRTIO_NET_F_HASH_REPORT is silently dropped, but this
> patch will change to work without dropping it, which is worth to mention.
After applying this series of patches the VIRTIO_NET_F_HASH_REPORT is
dropped _the same way_ as in the master
>
> Regards,
> Akihiko Odaki

Re: [PATCH v9 16/20] virtio-net: Do not write hashes to peer buffer

2024-04-08 Thread Yuri Benditovich

On Mon, Apr 8, 2024 at 10:42 AM Akihiko Odaki  wrote:
>
> On 2024/04/08 16:40, Yuri Benditovich wrote:
> > On Mon, Apr 8, 2024 at 4:30 AM Akihiko Odaki  
> > wrote:
> >>
> >> On 2024/04/08 7:09, Yuri Benditovich wrote:
> >>> On Wed, Apr 3, 2024 at 2:12 PM Akihiko Odaki  
> >>> wrote:
> >>>>
> >>>> The peer buffer is qualified with const and not meant to be modified.
> >>>
> >>> IMHO, this buffer is not so 'const' (although the prototype states so),
> >>> it is allocated in net.c
> >>> btw, another procedure in this file also modifies the buffer
> >>> (work_around_broken_dhclient)
> >>
> >> Right but it has a FIXME comment.
> >>
> >>>
> >>>> It also prevents enabling VIRTIO_NET_F_HASH_REPORT for peers without
> >>>> virtio-net header support.
> >>>
> >>> Does it mean _this commit_ prevents enabling VIRTIO_NET_F_HASH_REPORT
> >>> for peers without
> >>> virtio-net header support? Where?
> >>
> >> No, but I meant that this patch fixes such a problem.
> >
> > No, it does not. Such a problem does not exist in the master, the
> > hash_report feature
> > is silently dropped in such case:
> > https://github.com/qemu/qemu/blob/master/hw/net/virtio-net.c#L816
>
> Well, silently dropping VIRTIO_NET_F_HASH_REPORT is not different from
> preventing enabling VIRTIO_NET_F_HASH_REPORT, is it?
>
But how is your patch involved in it? Should this line be removed from
the commit message?


> Regards,
> Akihiko Odaki

Re: [PATCH v9 16/20] virtio-net: Do not write hashes to peer buffer

2024-04-08 Thread Yuri Benditovich

On Mon, Apr 8, 2024 at 4:30 AM Akihiko Odaki  wrote:
>
> On 2024/04/08 7:09, Yuri Benditovich wrote:
> > On Wed, Apr 3, 2024 at 2:12 PM Akihiko Odaki  
> > wrote:
> >>
> >> The peer buffer is qualified with const and not meant to be modified.
> >
> > IMHO, this buffer is not so 'const' (although the prototype states so),
> > it is allocated in net.c
> > btw, another procedure in this file also modifies the buffer
> > (work_around_broken_dhclient)
>
> Right but it has a FIXME comment.
>
> >
> >> It also prevents enabling VIRTIO_NET_F_HASH_REPORT for peers without
> >> virtio-net header support.
> >
> > Does it mean _this commit_ prevents enabling VIRTIO_NET_F_HASH_REPORT
> > for peers without
> > virtio-net header support? Where?
>
> No, but I meant that this patch fixes such a problem.

No, it does not. Such a problem does not exist in the master, the
hash_report feature
is silently dropped in such case:
https://github.com/qemu/qemu/blob/master/hw/net/virtio-net.c#L816

>
> Regards,
> Akihiko Odaki

Re: [PATCH v9 16/20] virtio-net: Do not write hashes to peer buffer

2024-04-07 Thread Yuri Benditovich

On Wed, Apr 3, 2024 at 2:12 PM Akihiko Odaki  wrote:
>
> The peer buffer is qualified with const and not meant to be modified.

IMHO, this buffer is not so 'const' (although the prototype states so),
it is allocated in net.c
btw, another procedure in this file also modifies the buffer
(work_around_broken_dhclient)

> It also prevents enabling VIRTIO_NET_F_HASH_REPORT for peers without
> virtio-net header support.

Does it mean _this commit_ prevents enabling VIRTIO_NET_F_HASH_REPORT
for peers without
virtio-net header support? Where?

> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/virtio-net.c | 36 +---
>  1 file changed, 17 insertions(+), 19 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 2de073ce18fd..ff1884564d0d 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -1823,16 +1823,9 @@ static uint8_t virtio_net_get_hash_type(bool hasip4,
>  return 0xff;
>  }
>
> -static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
> -   uint32_t hash)
> -{
> -struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
> -hdr->hash_value = hash;
> -hdr->hash_report = report;
> -}
> -
>  static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
> -  size_t size)
> +  size_t size,
> +  struct virtio_net_hdr_v1_hash *hdr)
>  {
>  VirtIONet *n = qemu_get_nic_opaque(nc);
>  unsigned int index = nc->queue_index, new_index = index;
> @@ -1863,7 +1856,8 @@ static int virtio_net_process_rss(NetClientState *nc, 
> const uint8_t *buf,
>   n->rss_data.hash_types);
>  if (net_hash_type > NetPktRssIpV6UdpEx) {
>  if (n->rss_data.populate_hash) {
> -virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
> +hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
> +hdr->hash_report = 0;
>  }
>  return n->rss_data.redirect ? n->rss_data.default_queue : -1;
>  }
> @@ -1871,7 +1865,8 @@ static int virtio_net_process_rss(NetClientState *nc, 
> const uint8_t *buf,
>  hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
>
>  if (n->rss_data.populate_hash) {
> -virtio_set_packet_hash(buf, reports[net_hash_type], hash);
> +hdr->hash_value = hash;
> +hdr->hash_report = reports[net_hash_type];
>  }
>
>  if (n->rss_data.redirect) {
> @@ -1891,7 +1886,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
> *nc, const uint8_t *buf,
>  VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
>  size_t lens[VIRTQUEUE_MAX_SIZE];
>  struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
> -struct virtio_net_hdr_mrg_rxbuf mhdr;
> +struct virtio_net_hdr_v1_hash extra_hdr;
>  unsigned mhdr_cnt = 0;
>  size_t offset, i, guest_offset, j;
>  ssize_t err;
> @@ -1901,7 +1896,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
> *nc, const uint8_t *buf,
>  }
>
>  if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
> -int index = virtio_net_process_rss(nc, buf, size);
> +int index = virtio_net_process_rss(nc, buf, size, _hdr);
>  if (index >= 0) {
>  NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
>  return virtio_net_receive_rcu(nc2, buf, size, true);
> @@ -1961,15 +1956,17 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
> *nc, const uint8_t *buf,
>  if (n->mergeable_rx_bufs) {
>  mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
>  sg, elem->in_num,
> -offsetof(typeof(mhdr), num_buffers),
> -sizeof(mhdr.num_buffers));
> +offsetof(typeof(extra_hdr), 
> hdr.num_buffers),
> +sizeof(extra_hdr.hdr.num_buffers));
>  }
>
>  receive_header(n, sg, elem->in_num, buf, size);
>  if (n->rss_data.populate_hash) {
> -offset = sizeof(mhdr);
> +offset = offsetof(typeof(extra_hdr), hash_value);
>  iov_from_buf(sg, elem->in_num, offset,
> - buf + offset, n->host_hdr_len - sizeof(mhdr));
> + (char *)_hdr + offset,
> + sizeof(extra_hdr.hash_value) +
> + sizeof(extra_hdr.hash_report));
>  }
>  offset = n->host_hdr_len;
>  total += n->guest_hdr_len;
> @@ -2015,10 +2012,11 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
> *nc, const uint8_t *buf,
>  }
>
>  if (mhdr_cnt) {
> -virtio_stw_p(vdev, _buffers, i);
> +virtio_stw_p(vdev, _hdr.hdr.num_buffers, i);
>  iov_from_buf(mhdr_sg,

Re: [PATCH v9 13/20] virtio-net: Return an error when vhost cannot enable RSS

2024-04-07 Thread Yuri Benditovich

On Wed, Apr 3, 2024 at 2:11 PM Akihiko Odaki  wrote:
>
> vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> implicitly disables RSS even if the user explicitly requests it. Return
> an error instead of implicitly disabling RSS if RSS is requested but not
> available.
>
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/virtio-net.c | 97 
> ++---
>  1 file changed, 48 insertions(+), 49 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 61b49e335dea..3d53eba88cfc 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -793,9 +793,6 @@ static uint64_t virtio_net_get_features(VirtIODevice 
> *vdev, uint64_t features,
>  return features;
>  }
>
> -if (!ebpf_rss_is_loaded(>ebpf_rss)) {
> -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> -}
>  features = vhost_net_get_features(get_vhost_net(nc->peer), features);
>  vdev->backend_features = features;
>
> @@ -3591,6 +3588,50 @@ static bool 
> failover_hide_primary_device(DeviceListener *listener,
>  return qatomic_read(>failover_primary_hidden);
>  }
>
> +static void virtio_net_device_unrealize(DeviceState *dev)
> +{
> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +VirtIONet *n = VIRTIO_NET(dev);
> +int i, max_queue_pairs;
> +
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> +virtio_net_unload_ebpf(n);
> +}
> +
> +/* This will stop vhost backend if appropriate. */
> +virtio_net_set_status(vdev, 0);
> +
> +g_free(n->netclient_name);
> +n->netclient_name = NULL;
> +g_free(n->netclient_type);
> +n->netclient_type = NULL;
> +
> +g_free(n->mac_table.macs);
> +g_free(n->vlans);
> +
> +if (n->failover) {
> +qobject_unref(n->primary_opts);
> +device_listener_unregister(>primary_listener);
> +migration_remove_notifier(>migration_state);
> +} else {
> +assert(n->primary_opts == NULL);
> +}
> +
> +max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +for (i = 0; i < max_queue_pairs; i++) {
> +virtio_net_del_queue(n, i);
> +}
> +/* delete also control vq */
> +virtio_del_queue(vdev, max_queue_pairs * 2);
> +qemu_announce_timer_del(>announce_timer, false);
> +g_free(n->vqs);
> +qemu_del_nic(n->nic);
> +virtio_net_rsc_cleanup(n);
> +g_free(n->rss_data.indirections_table);
> +net_rx_pkt_uninit(n->rx_pkt);
> +virtio_cleanup(vdev);
> +}
> +
>  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -3760,53 +3801,11 @@ static void virtio_net_device_realize(DeviceState 
> *dev, Error **errp)
>
>  net_rx_pkt_init(>rx_pkt);
>
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_load_ebpf(n);
> -}
> -}
> -
> -static void virtio_net_device_unrealize(DeviceState *dev)
> -{
> -VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> -VirtIONet *n = VIRTIO_NET(dev);
> -int i, max_queue_pairs;
> -
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_unload_ebpf(n);
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS) &&

I disagree with this change of qemu behavior.
>From my point of view:
- this is not a major problem and it should not be a reason to stop VM execution
- it is enough to disable the RSS feature and continue working. Depending on
  other qemu parameters (number of queues, number of cpus) this might be just
  suboptimal. might be a minor problem and might be not a problem at all
- this change defines rss as _only_ feature whose absence breaks the VM start,
  _all_ other features are dropped silently and only rss is not. Why??
- the series has a title 'Fixes and improvements' . This is not a fix and not an
  improvement, this is significant behavioral change that should be discussed in
  light of future plans regarding rss
- I suggest to remove this change from the series, submit it separately
  and discuss from all the sides




> +!virtio_net_load_ebpf(n) && get_vhost_net(nc->peer)) {
> +virtio_net_device_unrealize(dev);
> +error_setg(errp, "Can't load eBPF RSS for vhost");
>  }
> -
> -/* This will stop vhost backend if appropriate. */
> -virtio_net_set_status(vdev, 0);
> -
> -g_free(n->netclient_name);
> -n->netclient_name = NULL;
> -g_free(n->netclient_type);
> -n->netclient_type = NULL;
> -
> -g_free(n->mac_table.macs);
> -g_free(n->vlans);
> -
> -if (n->failover) {
> -qobject_unref(n->primary_opts);
> -device_listener_unregister(>primary_listener);
> -migration_remove_notifier(>migration_state);
> -} else {
> -assert(n->primary_opts == NULL);
> -}
> -
> -max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> -for (i = 0; i < max_queue_pairs; i++) {
> -

Re: [PATCH v2 2/2] ebpf: Fix indirections table setting

2024-03-28 Thread Yuri Benditovich

Hi Andrew,
Can you please check the indirection table copy and ack on the patch
if the fix is correct

Thanks,
Yuri

On Wed, Mar 27, 2024 at 4:05 AM Akihiko Odaki  wrote:
>
> The kernel documentation says:
> > The value stored can be of any size, however, all array elements are
> > aligned to 8 bytes.
> https://www.kernel.org/doc/html/v6.8/bpf/map_array.html
>
> Fixes: 333b3e5fab75 ("ebpf: Added eBPF map update through mmap.")
> Signed-off-by: Akihiko Odaki 
> ---
>  ebpf/ebpf_rss.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> index 2e506f974357..d102f3dd0929 100644
> --- a/ebpf/ebpf_rss.c
> +++ b/ebpf/ebpf_rss.c
> @@ -185,13 +185,18 @@ static bool ebpf_rss_set_indirections_table(struct 
> EBPFRSSContext *ctx,
>  uint16_t *indirections_table,
>  size_t len)
>  {
> +char *cursor = ctx->mmap_indirections_table;
> +
>  if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
>  return false;
>  }
>
> -memcpy(ctx->mmap_indirections_table, indirections_table,
> -sizeof(*indirections_table) * len);
> +for (size_t i = 0; i < len; i++) {
> +*(uint16_t *)cursor = indirections_table[i];
> +cursor += 8;
> +}
> +
>  return true;
>  }
>
>
> --
> 2.44.0
>

Re: [PATCH] virtio-net: correctly copy vnet header when flushing TX

2024-01-02 Thread Yuri Benditovich

I agree, thank you.

Where is this CVE-2023-6693 available?

Thanks,
Yuri

On Tue, Jan 2, 2024 at 5:29 AM Jason Wang  wrote:

> When HASH_REPORT is negotiated, the guest_hdr_len might be larger than
> the size of the mergeable rx buffer header. Using
> virtio_net_hdr_mrg_rxbuf during the header swap might lead a stack
> overflow in this case. Fixing this by using virtio_net_hdr_v1_hash
> instead.
>
> Reported-by: Xiao Lei 
> Cc: Yuri Benditovich 
> Cc: qemu-sta...@nongnu.org
> Cc: Mauro Matteo Cascella 
> Fixes: CVE-2023-6693
> Fixes: e22f0603fb2f ("virtio-net: reference implementation of hash report")
> Signed-off-by: Jason Wang 
> ---
>  hw/net/virtio-net.c | 13 +
>  1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 80c56f0cfc..73024babd4 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -674,6 +674,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n,
> int mergeable_rx_bufs,
>
>  n->mergeable_rx_bufs = mergeable_rx_bufs;
>
> +/*
> + * Note: when extending the vnet header, please make sure to
> + * change the vnet header copying logic in virtio_net_flush_tx()
> + * as well.
> + */
>  if (version_1) {
>  n->guest_hdr_len = hash_report ?
>  sizeof(struct virtio_net_hdr_v1_hash) :
> @@ -2693,7 +2698,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>  ssize_t ret;
>  unsigned int out_num;
>  struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1],
> *out_sg;
> -struct virtio_net_hdr_mrg_rxbuf mhdr;
> +struct virtio_net_hdr_v1_hash vhdr;
>
>  elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
>  if (!elem) {
> @@ -2710,7 +2715,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>  }
>
>  if (n->has_vnet_hdr) {
> -if (iov_to_buf(out_sg, out_num, 0, , n->guest_hdr_len) <
> +if (iov_to_buf(out_sg, out_num, 0, , n->guest_hdr_len) <
>  n->guest_hdr_len) {
>  virtio_error(vdev, "virtio-net header incorrect");
>  virtqueue_detach_element(q->tx_vq, elem, 0);
> @@ -2718,8 +2723,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
>  return -EINVAL;
>  }
>  if (n->needs_vnet_hdr_swap) {
> -virtio_net_hdr_swap(vdev, (void *) );
> -sg2[0].iov_base = 
> +virtio_net_hdr_swap(vdev, (void *) );
> +sg2[0].iov_base = 
>  sg2[0].iov_len = n->guest_hdr_len;
>  out_num = iov_copy([1], ARRAY_SIZE(sg2) - 1,
> out_sg, out_num,
> --
> 2.42.0
>
>

Re: [PATCH v7 4/5] qmp: Added new command to retrieve eBPF blob.

2023-12-18 Thread Yuri Benditovich

On Mon, Dec 18, 2023 at 2:54 PM Markus Armbruster  wrote:

> Uh, I missed this one, my apologies!  Let me have a quick look...
>
> Commit message style nitpick:
>
> qmp: Add command to retrieve eBPF blob
>
> or
>
> qmp: New command to retrieve eBPF blob
>
> It's a title, not a sentence.
>
> Andrew Melnychenko  writes:
>
> > Now, the binary objects may be retrieved by id.
> > It would require for future qmp commands that may require specific
> > eBPF blob.
> >
> > Added command "request-ebpf". This command returns
> > eBPF program encoded base64. The program taken from the
> > skeleton and essentially is an ELF object that can be
> > loaded in the future with libbpf.
> >
> > The reason to use the command to provide the eBPF object
> > instead of a separate artifact was to avoid issues related
> > to finding the eBPF itself. eBPF object is an ELF binary
> > that contains the eBPF program and eBPF map description(BTF).
> > Overall, eBPF object should contain the program and enough
> > metadata to create/load eBPF with libbpf. As the eBPF
> > maps/program should correspond to QEMU, the eBPF can't
> > be used from different QEMU build.
> >
> > The first solution was a helper that comes with QEMU
> > and loads appropriate eBPF objects. And the issue is
> > to find a proper helper if the system has several
> > different QEMUs installed and/or built from the source,
> > which helpers may not be compatible.
> >
> > Another issue is QEMU updating while there is a running
> > QEMU instance. With an updated helper, it may not be
> > possible to hotplug virtio-net device to the already
> > running QEMU. Overall, requesting the eBPF object from
> > QEMU itself solves possible failures with acceptable effort.
> >
> > Links:
> > [PATCH 3/5] qmp: Added the helper stamp check.
> > https://lore.kernel.org/all/20230219162100.174318-4-and...@daynix.com/
> >
> > Signed-off-by: Andrew Melnychenko 
>
> [...]
>
> > diff --git a/qapi/ebpf.json b/qapi/ebpf.json
> > new file mode 100644
> > index 00..ba78407148
> > --- /dev/null
> > +++ b/qapi/ebpf.json
> > @@ -0,0 +1,66 @@
> > +# -*- Mode: Python -*-
> > +# vim: filetype=python
> > +#
> > +# This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > +# See the COPYING file in the top-level directory.
> > +
> > +##
> > +# = eBPF Objects
> > +#
> > +# eBPF object is an ELF binary that contains the eBPF
> > +# program and eBPF map description(BTF). Overall, eBPF
> > +# object should contain the program and enough metadata
> > +# to create/load eBPF with libbpf. As the eBPF maps/program
> > +# should correspond to QEMU, the eBPF can't be used from
> > +# different QEMU build.
>
> An introduction, nice!
>
> Two spaces between sentences for consistency, please.
>
> Wrap comment lines at column 70, like this:
>
># eBPF object is an ELF binary that contains the eBPF program and eBPF
># map description(BTF).  Overall, eBPF object should contain the
># program and enough metadata to create/load eBPF with libbpf.  As the
># eBPF maps/program should correspond to QEMU, the eBPF can't be used
># from different QEMU build.
>
> > +#
> > +# Currently, there is a possible eBPF for receive-side scaling (RSS).
> > +#
> > +##
> > +
> > +##
> > +# @EbpfObject:
> > +#
> > +# An eBPF ELF object.
> > +#
> > +# @object: the eBPF object encoded in base64
> > +#
> > +# Since: 8.2
>
> Won't make 8.2, so bump to 9.0.  More of the same below, not noting it
> again.
>

Will there be 8.3?


>
> > +##
> > +{ 'struct': 'EbpfObject',
> > +  'data': {'object': 'str'},
> > +  'if': 'CONFIG_EBPF' }
> > +
> > +##
> > +# @EbpfProgramID:
> > +#
> > +# The eBPF programs that can be gotten with request-ebpf.
> > +#
> > +# @rss: Receive side scaling, technology that allows steering traffic
> > +# between queues by calculation hash.  Users may set up
> > +# indirection table and hash/packet types configurations.  Used
> > +# with virtio-net.
> > +#
> > +# Since: 8.2
> > +##
> > +{ 'enum': 'EbpfProgramID',
> > +  'if': 'CONFIG_EBPF',
> > +  'data': [ { 'name': 'rss' } ] }
> > +
> > +##
> > +# @request-ebpf:
> > +#
> > +# Retrieve an eBPF object that can be loaded with libbpf.  Management
> > +# applications (g.e. libvirt) may load it and pass file descriptors to
> > +# QEMU, so they can run running QEMU without BPF capabilities.
> > +#
> > +# @id: The ID of the program to return.
> > +#
> > +# Returns: eBPF object encoded in base64.
> > +#
> > +# Since: 8.2
> > +##
> > +{ 'command': 'request-ebpf',
> > +  'data': { 'id': 'EbpfProgramID' },
> > +  'returns': 'EbpfObject',
> > +  'if': 'CONFIG_EBPF' }
> > diff --git a/qapi/meson.build b/qapi/meson.build
> > index 60a668b343..90047dae1c 100644
> > --- a/qapi/meson.build
> > +++ b/qapi/meson.build
> > @@ -33,6 +33,7 @@ qapi_all_modules = [
> >'crypto',
> >'cxl',
> >'dump',
> > +  'ebpf',
> >'error',
> >'introspect',
> >'job',
> > diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
>

Re: [PATCH v7 5/5] ebpf: Updated eBPF program and skeleton.

2023-12-14 Thread Yuri Benditovich

Hi Jason,
As we anyway missed the timeframe of 8.2 please remove this v7 series from
the pull, we will send v8 in which we want to take in account most of
Akihiko comments (especially DEFINE_PROP_ARRAY and naming of the
properties).

Thank you very much

Yuri

On Wed, Dec 13, 2023 at 9:23 AM Yuri Benditovich <
yuri.benditov...@daynix.com> wrote:

>
> On Tue, Dec 12, 2023 at 5:33 AM Jason Wang  wrote:
>
>> On Mon, Dec 11, 2023 at 7:51 PM Yuri Benditovich
>>  wrote:
>> >
>> > Hello Jason,
>> > Can you please let us know what happens with this series?
>>
>> It should be my bad, it is in V1 of the pull request but missed
>> accidentally in V2 of the pull.
>>
>> I've merged it here,
>>
>> https://gitlab.com/jasowang/qemu.git
>
>
> Yes, the merged tree is OK. I see you changed the target version to 8.3
>  It looks like no more changes required for the PULL,
>  Please let us know if something is needed.
>
> Thanks,
> Yuri
>
>
>
>>
>> Please check if it's correct.
>>
>> Thanks
>>
>> >
>> > Thanks
>> > Yuri
>> >
>> > On Fri, Sep 8, 2023 at 9:43 AM Jason Wang  wrote:
>> >>
>> >> On Mon, Sep 4, 2023 at 7:23 PM Andrew Melnichenko 
>> wrote:
>> >> >
>> >> > Hi Jason,
>> >> > According to our previous conversation, I've added checks to the
>> meson script.
>> >> > Please confirm that everything is correct
>> >>
>> >> I've queued this series.
>> >>
>> >> Thanks
>> >>
>>
>>

Re: [PATCH v7 5/5] ebpf: Updated eBPF program and skeleton.

2023-12-12 Thread Yuri Benditovich

On Tue, Dec 12, 2023 at 5:33 AM Jason Wang  wrote:

> On Mon, Dec 11, 2023 at 7:51 PM Yuri Benditovich
>  wrote:
> >
> > Hello Jason,
> > Can you please let us know what happens with this series?
>
> It should be my bad, it is in V1 of the pull request but missed
> accidentally in V2 of the pull.
>
> I've merged it here,
>
> https://gitlab.com/jasowang/qemu.git


Yes, the merged tree is OK. I see you changed the target version to 8.3
 It looks like no more changes required for the PULL,
 Please let us know if something is needed.

Thanks,
Yuri



>
> Please check if it's correct.
>
> Thanks
>
> >
> > Thanks
> > Yuri
> >
> > On Fri, Sep 8, 2023 at 9:43 AM Jason Wang  wrote:
> >>
> >> On Mon, Sep 4, 2023 at 7:23 PM Andrew Melnichenko 
> wrote:
> >> >
> >> > Hi Jason,
> >> > According to our previous conversation, I've added checks to the
> meson script.
> >> > Please confirm that everything is correct
> >>
> >> I've queued this series.
> >>
> >> Thanks
> >>
>
>

Re: [PATCH v8 00/19] virtio-net RSS/hash report fixes and improvements

2023-12-11 Thread Yuri Benditovich

I'm adding also Yan

On Mon, Dec 11, 2023 at 9:51 PM Yuri Benditovich <
yuri.benditov...@daynix.com> wrote:

> Hi Michael,
> Sure, I've reviewed that also, there was a fruitful discussion
> till the series rеаched its final form.
> At the beginning of September we've got the response from Jason that the
> series is queued upstream so we were calm and switched to libvirt part ))
>
> Seems like a misunderstanding, let's wait for Jason response.
>
> Thanks,
> Yuri
>
>
>
>
> On Mon, Dec 11, 2023 at 5:43 PM Michael S. Tsirkin  wrote:
>
>> On Mon, Dec 11, 2023 at 02:34:56PM +0200, Yuri Benditovich wrote:
>> > https://lists.gnu.org/archive/html/qemu-devel/2023-08/msg05859.html
>>
>> It's from August, I think it's fair to say it's not going upstream
>> unless there's some activity. Yuri did you review that series then?
>> Care to ack?
>>
>> --
>> MST
>>
>>

Re: [PATCH v8 00/19] virtio-net RSS/hash report fixes and improvements

2023-12-11 Thread Yuri Benditovich

Hi Michael,
Sure, I've reviewed that also, there was a fruitful discussion
till the series rеаched its final form.
At the beginning of September we've got the response from Jason that the
series is queued upstream so we were calm and switched to libvirt part ))

Seems like a misunderstanding, let's wait for Jason response.

Thanks,
Yuri

On Mon, Dec 11, 2023 at 5:43 PM Michael S. Tsirkin  wrote:

> On Mon, Dec 11, 2023 at 02:34:56PM +0200, Yuri Benditovich wrote:
> > https://lists.gnu.org/archive/html/qemu-devel/2023-08/msg05859.html
>
> It's from August, I think it's fair to say it's not going upstream
> unless there's some activity. Yuri did you review that series then?
> Care to ack?
>
> --
> MST
>
>

Re: [PATCH v7 1/5] ebpf: Added eBPF map update through mmap.

2023-12-11 Thread Yuri Benditovich

Akihiko,
This series was already discussed several months ago.
I'd suggest to postpone commenting on it and resume them after merging.

Thanks for understanding.
Yuri

On Mon, Dec 11, 2023 at 3:05 PM Akihiko Odaki 
wrote:

> On 2023/08/31 15:51, Andrew Melnychenko wrote:
> > Changed eBPF map updates through mmaped array.
> > Mmaped arrays provide direct access to map data.
> > It should omit using bpf_map_update_elem() call,
> > which may require capabilities that are not present.
> >
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >   ebpf/ebpf_rss.c | 117 ++--
> >   ebpf/ebpf_rss.h |   5 +++
> >   2 files changed, 99 insertions(+), 23 deletions(-)
> >
> > diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> > index cee658c158..247f5eee1b 100644
> > --- a/ebpf/ebpf_rss.c
> > +++ b/ebpf/ebpf_rss.c
> > @@ -27,19 +27,83 @@ void ebpf_rss_init(struct EBPFRSSContext *ctx)
> >   {
> >   if (ctx != NULL) {
> >   ctx->obj = NULL;
> > +ctx->program_fd = -1;
> > +ctx->map_configuration = -1;
> > +ctx->map_toeplitz_key = -1;
> > +ctx->map_indirections_table = -1;
> > +
> > +ctx->mmap_configuration = NULL;
> > +ctx->mmap_toeplitz_key = NULL;
> > +ctx->mmap_indirections_table = NULL;
> >   }
> >   }
> >
> >   bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
> >   {
> > -return ctx != NULL && ctx->obj != NULL;
> > +return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
> > +}
> > +
> > +static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
> > +{
> > +if (!ebpf_rss_is_loaded(ctx)) {
> > +return false;
> > +}
> > +
> > +ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
> > +   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +   ctx->map_configuration, 0);
> > +if (ctx->mmap_configuration == MAP_FAILED) {
> > +trace_ebpf_error("eBPF RSS", "can not mmap eBPF configuration
> array");
> > +return false;
> > +}
> > +ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
> > +   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +   ctx->map_toeplitz_key, 0);
> > +if (ctx->mmap_toeplitz_key == MAP_FAILED) {
> > +trace_ebpf_error("eBPF RSS", "can not mmap eBPF toeplitz key");
> > +goto toeplitz_fail;
> > +}
> > +ctx->mmap_indirections_table = mmap(NULL,
> qemu_real_host_page_size(),
> > +   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +   ctx->map_indirections_table, 0);
> > +if (ctx->mmap_indirections_table == MAP_FAILED) {
> > +trace_ebpf_error("eBPF RSS", "can not mmap eBPF indirection
> table");
> > +goto indirection_fail;
> > +}
> > +
> > +return true;
> > +
> > +indirection_fail:
> > +munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > +toeplitz_fail:
> > +munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > +
> > +ctx->mmap_configuration = NULL;
> > +ctx->mmap_toeplitz_key = NULL;
> > +ctx->mmap_indirections_table = NULL;
>
> What about:
>
>  > +indirection_fail:
>  > +munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
>  > +ctx->mmap_toeplitz_key = NULL;
>  > +toeplitz_fail:
>  > +munmap(ctx->mmap_configuration, qemu_real_host_page_size());
>  > +ctx->mmap_configuration = NULL;
>
> It will be clearer when the pointer becomes invalid this way.
>
> > +return false;
> > +}
> > +
> > +static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
> > +{
> > +if (!ebpf_rss_is_loaded(ctx)) {
> > +return;
> > +}
> > +
> > +munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
> > +munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
> > +munmap(ctx->mmap_configuration, qemu_real_host_page_size());
> > +
> > +ctx->mmap_configuration = NULL;
> > +ctx->mmap_toeplitz_key = NULL;
> > +ctx->mmap_indirections_table = NULL;
> >   }
> >
> >   bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> >   {
> >   struct rss_bpf *rss_bpf_ctx;
> >
> > -if (ctx == NULL) {
> > +if (ctx == NULL || ebpf_rss_is_loaded(ctx)) {
> >   return false;
> >   }
>
> You can omit ctx == NULL just as you do for ebpf_rss_munmap().
>
> >
> > @@ -66,10 +130,18 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
> >   ctx->map_toeplitz_key = bpf_map__fd(
> >   rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
> >
> > +if (!ebpf_rss_mmap(ctx)) {
> > +goto error;
> > +}
> > +
> >   return true;
> >   error:
> >   rss_bpf__destroy(rss_bpf_ctx);
> >   ctx->obj = NULL;
> > +ctx->program_fd = -1;
> > +ctx->map_configuration = -1;
> > +ctx->map_toeplitz_key = -1;
> > +ctx->map_indirections_table = -1;
> >
> >   return false;
> >   }
> > @@ -77,15

Re: [PATCH v8 00/19] virtio-net RSS/hash report fixes and improvements

2023-12-11 Thread Yuri Benditovich

https://lists.gnu.org/archive/html/qemu-devel/2023-08/msg05859.html

On Mon, Dec 11, 2023 at 2:01 PM Akihiko Odaki 
wrote:

> On 2023/12/11 20:54, Yuri Benditovich wrote:
> > People, I suggest to wait a little and understand what happens with the
> > previous series from Andrew Melnichenko (support for qemu under libvirt
> etc)
>
> Can you share the link to the series?
>
> > According to response from Jason from Sept 8 it was queued but I do not
> > see it in the master branch.
>
> I didn't see it either.
>
> Regards,
> Akihiko Odaki
>

Re: [PATCH v8 00/19] virtio-net RSS/hash report fixes and improvements

2023-12-11 Thread Yuri Benditovich

People, I suggest to wait a little and understand what happens with the
previous series from Andrew Melnichenko (support for qemu under libvirt etc)
According to response from Jason from Sept 8 it was queued but I do not see
it in the master branch.

Thanks a lot.

On Sun, Dec 10, 2023 at 7:30 AM Akihiko Odaki 
wrote:

> This series contains fixes and improvements for virtio-net RSS and hash
> reporting feature.
>
> V7 -> V8:
>   Reset author email addresses.
>   Rebased.
>
> V6 -> V7:
>   Dropped patch "virtio-net: Do not clear VIRTIO_NET_F_HASH_REPORT".
>   Dropped the changes to remove packet flags.
>   Re-introduced tap_receive() and changed it to call tap_receive_iov().
>   Removed tap_get_vnet_hdr_len().
>   Fixed tap initialization not to call tap_fd_set_vnet_hdr_len() for tap
>   without virtio-net header.
>   Changed to call error_report() instead of warn_report() for
>   programming errors.
>
> V5 -> V6:
>   Corrected the message for patch "virtio-net: Return an error when vhost
>   cannot enable RSS".
>   Removed changes to introduce asserts from "virtio-net: Return an error
>   when vhost cannot enable RSS".
>   Reorganized patches "virtio-net: Return an error when vhost cannot
>   enable RSS" and "virtio-net: Do not clear VIRTIO_NET_F_RSS". This
>   version now contains patches "virtio-net: Return an error when vhost
>   cannot enable RSS" and "virtio-net: Enable software RSS".
>   Rebased.
>
> V4 -> V5:
>   Added patch "virtio-net: Do not write hashes to peer buffer".
>
> V3 -> V4:
>   Extract patches "tap: Remove tap_receive()" and  "net: Remove flag
>   propagation" from "net: Remove receive_raw()".
>   Added patch "virtio-net: Always set populate_hash".
>   Added patch "virtio-net: Do not clear VIRTIO_NET_F_HASH_REPORT".
>   Added patch "ebpf: Use standard section name".
>   Added patch "ebpf: Simplify error handling".
>   Added patch "ebpf: Return 0 when configuration fails".
>   Added patch "ebpf: Refactor tun_rss_steering_prog()".
>   Added patch "ebpf: Add a separate target for skeleton".
>
> V2 -> V3:
>   Added patch "tap: Remove tap_probe_vnet_hdr_len()".
>   Added patch "tap: Remove qemu_using_vnet_hdr()".
>   Added patch "net: Move virtio-net header length assertion".
>   Added patch "net: Remove receive_raw()".
>   Added patch "tap: Shrink zeroed virtio-net header".
>   Dropped patch "tap: Fix virtio-net header buffer size".
>
> V1 -> V2:
>   Added patch "ebpf: Fix RSS error handling".
>
> Signed-off-by: Akihiko Odaki 
> ---
> Akihiko Odaki (19):
>   tap: Remove tap_probe_vnet_hdr_len()
>   tap: Remove qemu_using_vnet_hdr()
>   net: Move virtio-net header length assertion
>   net: Remove receive_raw()
>   tap: Call tap_receive_iov() from tap_receive()
>   tap: Shrink zeroed virtio-net header
>   virtio-net: Copy header only when necessary
>   virtio-net: Disable RSS on reset
>   virtio-net: Unify the logic to update NIC state for RSS
>   virtio-net: Return an error when vhost cannot enable RSS
>   virtio-net: Report RSS warning at device realization
>   virtio-net: Always set populate_hash
>   virtio-net: Do not write hashes to peer buffer
>   ebpf: Fix RSS error handling
>   ebpf: Use standard section name
>   ebpf: Simplify error handling
>   ebpf: Return 0 when configuration fails
>   ebpf: Refactor tun_rss_steering_prog()
>   ebpf: Add a separate target for skeleton
>
>  ebpf/rss.bpf.skeleton.h  | 1557
> +++---
>  include/net/net.h|8 -
>  net/tap_int.h|1 -
>  ebpf/ebpf_rss.c  |   12 +-
>  hw/net/e1000e.c  |1 -
>  hw/net/igb.c |1 -
>  hw/net/net_tx_pkt.c  |4 +-
>  hw/net/virtio-net.c  |  308 +
>  hw/net/vmxnet3.c |2 -
>  net/dump.c   |4 +-
>  net/net.c|   47 +-
>  net/netmap.c |5 -
>  net/tap-bsd.c|5 -
>  net/tap-linux.c  |   20 -
>  net/tap-solaris.c|5 -
>  net/tap-stub.c   |5 -
>  net/tap.c|   77 +--
>  tools/ebpf/rss.bpf.c |   46 +-
>  tools/ebpf/Makefile.ebpf |   15 +-
>  19 files changed, 986 insertions(+), 1137 deletions(-)
> ---
> base-commit: 9c74490bff6c8886a922008d0c9ce6cae70dd17e
> change-id: 20231210-rss-e7c98e722253
>
> Best regards,
> --
> Akihiko Odaki 
>
>

Re: [PATCH v7 5/5] ebpf: Updated eBPF program and skeleton.

2023-12-11 Thread Yuri Benditovich

Hello Jason,
Can you please let us know what happens with this series?

Thanks
Yuri

On Fri, Sep 8, 2023 at 9:43 AM Jason Wang  wrote:

> On Mon, Sep 4, 2023 at 7:23 PM Andrew Melnichenko 
> wrote:
> >
> > Hi Jason,
> > According to our previous conversation, I've added checks to the meson
> script.
> > Please confirm that everything is correct
>
> I've queued this series.
>
> Thanks
>
>

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-14 Thread Yuri Benditovich

On Tue, Nov 14, 2023 at 9:03 AM Akihiko Odaki 
wrote:

> On 2023/11/14 2:26, Yuri Benditovich wrote:
> >
> >
> > On Mon, Nov 13, 2023 at 2:44 PM Akihiko Odaki  > <mailto:akihiko.od...@daynix.com>> wrote:
> >
> > On 2023/11/13 20:44, Yuri Benditovich wrote:
> >  >
> >  >
> >  > On Sat, Nov 11, 2023 at 5:28 PM Akihiko Odaki
> > mailto:akihiko.od...@daynix.com>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>> wrote:
> >  >
> >  > On 2023/11/03 22:14, Yuri Benditovich wrote:
> >  >  >
> >  >  >
> >  >  > On Fri, Nov 3, 2023 at 11:55 AM Akihiko Odaki
> >  > mailto:akihiko.od...@daynix.com>
> > <mailto:akihiko.od...@daynix.com <mailto:akihiko.od...@daynix.com>>
> >  >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>>> wrote:
> >  >  >
> >  >  > On 2023/11/03 18:35, Yuri Benditovich wrote:
> >  >  >  >
> >  >  >  >
> >  >  >  > On Thu, Nov 2, 2023 at 4:56 PM Akihiko Odaki
> >  >  >  > <mailto:akihiko.od...@daynix.com> <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com> <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>>
> >  >  >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>
> >  >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>>>> wrote:
> >  >  >  >
> >  >  >  > On 2023/11/02 19:20, Yuri Benditovich wrote:
> >  >  >  >  >
> >  >  >  >  >
> >  >  >  >  > On Thu, Nov 2, 2023 at 11:33 AM Michael S.
> > Tsirkin
> >  >  >  > mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>
> >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>>
> >  >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>
> >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>>>
> >  >  >  >  > <mailto:m...@redhat.com
> > <mailto:m...@redhat.com> <mailto:m...@redhat.com  m...@redhat.com>>
> >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>>
> >  >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>
> >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>>>>> wrote:
> >  >  >  >  >
> >  >  >  >  > On Thu, Nov 02, 2023 at 11:09:27AM
> > +0200, Yuri
> >  >  > Benditovich wrote:
> >  >  >  >  >  > Probably we mix two different patches
> > in this
> >  >  > discussion.
> >  >  >  >  >  > Focusing on the patch in the e-mail
> > header:
> >  >  >  >  >  >
> >  >  >  >  >  > IMO it is not acceptable to fail QEMU
> run
> >  > for one
> >  >  > feature
> >  >  >  > that we
> >  >  >  >  > can't make
> >  &

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-13 Thread Yuri Benditovich

On Mon, Nov 13, 2023 at 2:44 PM Akihiko Odaki 
wrote:

> On 2023/11/13 20:44, Yuri Benditovich wrote:
> >
> >
> > On Sat, Nov 11, 2023 at 5:28 PM Akihiko Odaki  > <mailto:akihiko.od...@daynix.com>> wrote:
> >
> > On 2023/11/03 22:14, Yuri Benditovich wrote:
> >  >
> >  >
> >  > On Fri, Nov 3, 2023 at 11:55 AM Akihiko Odaki
> > mailto:akihiko.od...@daynix.com>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>> wrote:
> >  >
> >  > On 2023/11/03 18:35, Yuri Benditovich wrote:
> >  >  >
> >  >  >
> >  >  > On Thu, Nov 2, 2023 at 4:56 PM Akihiko Odaki
> >  > mailto:akihiko.od...@daynix.com>
> > <mailto:akihiko.od...@daynix.com <mailto:akihiko.od...@daynix.com>>
> >  >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>>> wrote:
> >  >  >
> >  >  > On 2023/11/02 19:20, Yuri Benditovich wrote:
> >  >  >  >
> >  >  >  >
> >  >  >  > On Thu, Nov 2, 2023 at 11:33 AM Michael S. Tsirkin
> >  >  > mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>
> >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>>
> >  >  >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>
> >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>>>> wrote:
> >  >  >  >
> >  >  >  > On Thu, Nov 02, 2023 at 11:09:27AM +0200, Yuri
> >  > Benditovich wrote:
> >  >  >  >  > Probably we mix two different patches in this
> >  > discussion.
> >  >  >  >  > Focusing on the patch in the e-mail header:
> >  >  >  >  >
> >  >  >  >  > IMO it is not acceptable to fail QEMU run
> > for one
> >  > feature
> >  >  > that we
> >  >  >  > can't make
> >  >  >  >  > active when we silently drop all other
> > features in
> >  > such a
> >  >  > case.
> >  >  >  >
> >  >  >  > If the feature is off by default then it seems
> more
> >  > reasonable
> >  >  >  > and silent masking can be seen as a bug.
> >  >  >  > Most virtio features are on by default this is
> > why it's
> >  >  >  > reasonable to mask them.
> >  >  >  >
> >  >  >  >
> >  >  >  > If we are talking about RSS: setting it initially
> > off is the
> >  >  > development
> >  >  >  > time decision.
> >  >  >  > When it will be completely stable there is no
> reason to
> >  > keep it
> >  >  > off by
> >  >  >  > default, so this is more a question of time and of a
> >  > readiness of
> >  >  > libvirt.
> >  >  >
> >  >  > It is not ok to make "on" the default; that will
> > enable RSS
> >  > even when
> >  >  > eBPF steering support is not present and can result in
> >  > performance
> >  >  > degradation.
> >  >  >
> >  >  >
> >  >  > Exactly as it is today - with vhost=on the host does not
> > suggest RSS
> >  >  > without  eBPF.
> >  >  > I do not understand what you call "performance
> > degradation", can you
> >  >  > describe the scenario?
> >  >
> >  > I was not clear, but I was talking about the case of
> > vhost=off

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-13 Thread Yuri Benditovich

On Sat, Nov 11, 2023 at 5:28 PM Akihiko Odaki 
wrote:

> On 2023/11/03 22:14, Yuri Benditovich wrote:
> >
> >
> > On Fri, Nov 3, 2023 at 11:55 AM Akihiko Odaki  > <mailto:akihiko.od...@daynix.com>> wrote:
> >
> > On 2023/11/03 18:35, Yuri Benditovich wrote:
> >  >
> >  >
> >  > On Thu, Nov 2, 2023 at 4:56 PM Akihiko Odaki
> > mailto:akihiko.od...@daynix.com>
> >  > <mailto:akihiko.od...@daynix.com
> > <mailto:akihiko.od...@daynix.com>>> wrote:
> >  >
> >  > On 2023/11/02 19:20, Yuri Benditovich wrote:
> >  >  >
> >  >  >
> >  >  > On Thu, Nov 2, 2023 at 11:33 AM Michael S. Tsirkin
> >  > mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>
> >  >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>
> > <mailto:m...@redhat.com <mailto:m...@redhat.com>>>> wrote:
> >  >  >
> >  >  > On Thu, Nov 02, 2023 at 11:09:27AM +0200, Yuri
> > Benditovich wrote:
> >  >  >  > Probably we mix two different patches in this
> > discussion.
> >  >  >  > Focusing on the patch in the e-mail header:
> >  >  >  >
> >  >  >  > IMO it is not acceptable to fail QEMU run for one
> > feature
> >  > that we
> >  >  > can't make
> >  >  >  > active when we silently drop all other features in
> > such a
> >  > case.
> >  >  >
> >  >  > If the feature is off by default then it seems more
> > reasonable
> >  >  > and silent masking can be seen as a bug.
> >  >  > Most virtio features are on by default this is why it's
> >  >  > reasonable to mask them.
> >  >  >
> >  >  >
> >  >  > If we are talking about RSS: setting it initially off is
> the
> >  > development
> >  >  > time decision.
> >  >  > When it will be completely stable there is no reason to
> > keep it
> >  > off by
> >  >  > default, so this is more a question of time and of a
> > readiness of
> >  > libvirt.
> >  >
> >  > It is not ok to make "on" the default; that will enable RSS
> > even when
> >  > eBPF steering support is not present and can result in
> > performance
> >  > degradation.
> >  >
> >  >
> >  > Exactly as it is today - with vhost=on the host does not suggest
> RSS
> >  > without  eBPF.
> >  > I do not understand what you call "performance degradation", can
> you
> >  > describe the scenario?
> >
> > I was not clear, but I was talking about the case of vhost=off or
> peers
> > other than tap (e.g., user). rss=on employs in-qemu RSS, which incurs
> > overheads for such configurations.
> >
> >
> > So, vhost=off OR peers other than tap:
> >
> > In the case of peers other than tap (IMO) we're not talking about
> > performance at all.
> > Backends like "user" (without vnet_hdr) do not support _many_
> > performance-oriented features.
> > If RSS is somehow "supported" for such backends this is rather a
> > misunderstanding (IMO again).
>
> We do not need to ensure good performance when RSS is enabled by the
> guest for backends without eBPF steering program as you say. In-QEMU RSS
> is only useful for testing and not meant to improve the performance.
>
> However, if you set rss=on, QEMU will advertise the availability of RSS
> feature. The guest will have no mean to know if it's implemented in a
> way not performance-wise so it may decide to use the feature to improve
> the performance, which can result in performance degradation. Therefore,
> it's better not to set rss=on for such backends.
>

I still do not understand what is the scenario where you see or suspect the
mentioned "performance degradation".
We can discuss whether such a problem exists as soon as you explain it.

Re: [PATCH v6 15/21] virtio-net: Do not clear VIRTIO_NET_F_HASH_REPORT

2023-11-03 Thread Yuri Benditovich

On Mon, Oct 30, 2023 at 7:15 AM Akihiko Odaki 
wrote:

> virtio-net can report hash values even if the peer does not have a
> virtio-net header.
>
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/virtio-net.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index e30105884c..bdb4579f98 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -774,8 +774,6 @@ static uint64_t virtio_net_get_features(VirtIODevice
> *vdev, uint64_t features,
>  virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
>  virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
>  virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
> -
> -virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
>  }
>
>
IMO, we should not enable any advanced features for backends without
vnet_hdr unless we have a strong reason to do so.
( HOST_TSO and GUEST_TSO are performance boosters and they are not
supported without vnet_hdr )
I'd rather disable also RSS under this "if".



>  if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
> --
> 2.42.0
>
>

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-03 Thread Yuri Benditovich

On Fri, Nov 3, 2023 at 11:55 AM Akihiko Odaki 
wrote:

> On 2023/11/03 18:35, Yuri Benditovich wrote:
> >
> >
> > On Thu, Nov 2, 2023 at 4:56 PM Akihiko Odaki  > <mailto:akihiko.od...@daynix.com>> wrote:
> >
> > On 2023/11/02 19:20, Yuri Benditovich wrote:
> >  >
> >  >
> >  > On Thu, Nov 2, 2023 at 11:33 AM Michael S. Tsirkin
> > mailto:m...@redhat.com>
> >  > <mailto:m...@redhat.com <mailto:m...@redhat.com>>> wrote:
> >  >
> >  > On Thu, Nov 02, 2023 at 11:09:27AM +0200, Yuri Benditovich
> wrote:
> >  >  > Probably we mix two different patches in this discussion.
> >  >  > Focusing on the patch in the e-mail header:
> >  >  >
> >  >  > IMO it is not acceptable to fail QEMU run for one feature
> > that we
> >  > can't make
> >  >  > active when we silently drop all other features in such a
> > case.
> >  >
> >  > If the feature is off by default then it seems more reasonable
> >  > and silent masking can be seen as a bug.
> >  > Most virtio features are on by default this is why it's
> >  > reasonable to mask them.
> >  >
> >  >
> >  > If we are talking about RSS: setting it initially off is the
> > development
> >  > time decision.
> >  > When it will be completely stable there is no reason to keep it
> > off by
> >  > default, so this is more a question of time and of a readiness of
> > libvirt.
> >
> > It is not ok to make "on" the default; that will enable RSS even when
> > eBPF steering support is not present and can result in performance
> > degradation.
> >
> >
> > Exactly as it is today - with vhost=on the host does not suggest RSS
> > without  eBPF.
> > I do not understand what you call "performance degradation", can you
> > describe the scenario?
>
> I was not clear, but I was talking about the case of vhost=off or peers
> other than tap (e.g., user). rss=on employs in-qemu RSS, which incurs
> overheads for such configurations.
>

So, vhost=off OR peers other than tap:

In the case of peers other than tap (IMO) we're not talking about
performance at all.
Backends like "user" (without vnet_hdr) do not support _many_
performance-oriented features.
If RSS is somehow "supported" for such backends this is rather a
misunderstanding (IMO again).

In the case of tap with vhost=off the RSS support does not create any
performance degradation without eBPF.

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-03 Thread Yuri Benditovich

On Thu, Nov 2, 2023 at 4:56 PM Akihiko Odaki 
wrote:

> On 2023/11/02 19:20, Yuri Benditovich wrote:
> >
> >
> > On Thu, Nov 2, 2023 at 11:33 AM Michael S. Tsirkin  > <mailto:m...@redhat.com>> wrote:
> >
> >     On Thu, Nov 02, 2023 at 11:09:27AM +0200, Yuri Benditovich wrote:
> >  > Probably we mix two different patches in this discussion.
> >  > Focusing on the patch in the e-mail header:
> >  >
> >  > IMO it is not acceptable to fail QEMU run for one feature that we
> > can't make
> >  > active when we silently drop all other features in such a case.
> >
> > If the feature is off by default then it seems more reasonable
> > and silent masking can be seen as a bug.
> > Most virtio features are on by default this is why it's
> > reasonable to mask them.
> >
> >
> > If we are talking about RSS: setting it initially off is the development
> > time decision.
> > When it will be completely stable there is no reason to keep it off by
> > default, so this is more a question of time and of a readiness of
> libvirt.
>
> It is not ok to make "on" the default; that will enable RSS even when
> eBPF steering support is not present and can result in performance
> degradation.
>

Exactly as it is today - with vhost=on the host does not suggest RSS
without  eBPF.
I do not understand what you call "performance degradation", can you
describe the scenario?


>
> We will need OnOffAuto instead of a simple boolean value if we are going
> to enable RSS when eBPF steering support is available; "auto" will be
> the default and will enable RSS if and only if eBPF steering support is
> available. "on" will not be default so it's better to validate if RSS is
> available when the user explicitly specified "on" for the "rss" property.
>

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-02 Thread Yuri Benditovich

On Thu, Nov 2, 2023 at 1:26 PM Michael S. Tsirkin  wrote:

> On Thu, Nov 02, 2023 at 12:20:39PM +0200, Yuri Benditovich wrote:
> >
> >
> > On Thu, Nov 2, 2023 at 11:33 AM Michael S. Tsirkin 
> wrote:
> >
> > On Thu, Nov 02, 2023 at 11:09:27AM +0200, Yuri Benditovich wrote:
> > > Probably we mix two different patches in this discussion.
> > > Focusing on the patch in the e-mail header:
> > >
> > > IMO it is not acceptable to fail QEMU run for one feature that we
> can't
> > make
> > > active when we silently drop all other features in such a case.
> >
> > If the feature is off by default then it seems more reasonable
> > and silent masking can be seen as a bug.
> > Most virtio features are on by default this is why it's
> > reasonable to mask them.
> >
> >
> >
> > If we are talking about RSS: setting it initially off is the development
> time
> > decision.
> > When it will be completely stable there is no reason to keep it off by
> default,
> > so this is more a question of time and of a readiness of libvirt.
>
> Well when we flip the default we'll need compat machinery for sure ;)
>

Of course, on the flip or default we'll need to keep compatibility to
earlier machine types.
But, because in the perspective it makes sense to make the RSS is on by
default, I do not think we need _now_ to make qemu fail to start if the
ebpf can't be loaded.


>
> --
> MST
>
>

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-02 Thread Yuri Benditovich

On Thu, Nov 2, 2023 at 11:33 AM Michael S. Tsirkin  wrote:

> On Thu, Nov 02, 2023 at 11:09:27AM +0200, Yuri Benditovich wrote:
> > Probably we mix two different patches in this discussion.
> > Focusing on the patch in the e-mail header:
> >
> > IMO it is not acceptable to fail QEMU run for one feature that we can't
> make
> > active when we silently drop all other features in such a case.
>
> If the feature is off by default then it seems more reasonable
> and silent masking can be seen as a bug.
> Most virtio features are on by default this is why it's
> reasonable to mask them.
>
>
If we are talking about RSS: setting it initially off is the development
time decision.
When it will be completely stable there is no reason to keep it off by
default, so this is more a question of time and of a readiness of libvirt.

> --
> MST
>
>

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-11-02 Thread Yuri Benditovich

Probably we mix two different patches in this discussion.
Focusing on the patch in the e-mail header:

IMO it is not acceptable to fail QEMU run for one feature that we can't
make active when we silently drop all other features in such a case.

On Wed, Nov 1, 2023 at 11:15 AM Akihiko Odaki 
wrote:

> On 2023/11/01 18:09, Michael S. Tsirkin wrote:
> > On Wed, Nov 01, 2023 at 05:35:50PM +0900, Akihiko Odaki wrote:
> >> On 2023/11/01 15:38, Michael S. Tsirkin wrote:
> >>> On Wed, Nov 01, 2023 at 01:50:00PM +0900, Akihiko Odaki wrote:
>  We had another discussion regarding migration for patch "virtio-net:
> Do not
>  clear VIRTIO_NET_F_HASH_REPORT". It does change the runtime behavior
> so we
>  need to take migration into account. I still think the patch does not
>  require a compatibility flag since it only exposes a new feature and
> does
>  not prevent migrating from old QEMU that exposes less features. It
> instead
>  fixes the case where migrating between hosts with different tap
> feature
>  sets.
> >>>
> >>> When in doubt, add a compat flag.
> >>
> >> Personally I'm confident about the migration compatibility with patch
> >> "virtio-net: Do not clear VIRTIO_NET_F_HASH_REPORT". virtio-net already
> does
> >> the same thing when the tap implementation on the destination implements
> >> virtio-net header support while the counterpart of the source does not.
> >
> > Trust me there's been so many times where we were very sure and
> > problems come up later. Just don't enable new functionality for
> > old machine types, problem solved. Why is this hard?
>
> I see. I'll add a compatibility flag for VIRTIO_NET_F_HASH_REPORT
> exposure; it should be quite easy.
>

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-10-30 Thread Yuri Benditovich

On Mon, Oct 30, 2023 at 2:21 PM Akihiko Odaki 
wrote:

> On 2023/10/30 21:14, Yuri Benditovich wrote:
> >
> >
> > On Mon, Oct 30, 2023 at 7:14 AM Akihiko Odaki  > <mailto:akihiko.od...@daynix.com>> wrote:
> >
> > vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> > implicitly disables RSS even if the user explicitly requests it.
> Return
> > an error instead of implicitly disabling RSS if RSS is requested but
> not
> > available.
> >
> >
> > I think that suggesting RSS feature when in fact it is not available is
> > not a good idea, this rather desinforms the guest.
> > Existing behavior (IMHO) makes more sense.
> > We can extend this discussion if needed, of course.
>
> This change is not to advertise RSS when it's not available; it instead
> reports an error to the user.
>
> For example, think of the following command line:
> qemu-system-x86_64 -device virtio-net,rss=on,netdev=n -netdev user,id=n
>
> Before this change, it gives no error and the user will not know RSS is
> not available. With this change it now gives an error as follows:
> qemu-system-x86_64: -device virtio-net,rss=on,netdev=n: Can't load eBPF RSS
>

Does this mean failure to run QEMU if the RSS required in the command line
and EBPF can't be loaded?
(for example if we run the system with kernel < 5.8)?
I'm not sure this is user-friendly behavior...

Re: [PATCH v6 12/21] virtio-net: Enable software RSS

2023-10-30 Thread Yuri Benditovich

On Mon, Oct 30, 2023 at 7:14 AM Akihiko Odaki 
wrote:

> virtio-net implements software RSS but does not enable it. Enable it
> when RSS is requested, but the eBPF implementation is not available.
> We also check if vhost is in use in such a case since software RSS is
> incompatible with vhost. A warning will be emitted when falling back to
> software RSS since it provides no performance benefit.
>
>
Can you please elaborate what is wrong from your point of view in the
existing implementation?
In general it does (IMO) what you describe.
I'd like to note several things:
- The "vhost=off" is in fact the fallback mode (libvirt default is vhost=on)
- The main goal of software RSS was to provide a reference for RSS
implementation for future virtio-net hardware
- Performance benefit of software RSS (as well as implementation in EBPF)
is delivery of each packet to proper virtqueue/CPU and avoiding packet
rescheduling in the guest
- The best thing (IMO) would be to implement hash delivery with vhost=on,
i.e. in EBPF and as soon as this is possible - rely on the hardware/host
capabilities and stop calculating the hash in the guest driver (as we do
today in Windows)




> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/virtio-net.c | 21 +++--
>  1 file changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 7bb91617d0..1fa020d905 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -1260,10 +1260,12 @@ static bool virtio_net_attach_epbf_rss(VirtIONet
> *n)
>
>  if (!ebpf_rss_set_all(>ebpf_rss, ,
>n->rss_data.indirections_table,
> n->rss_data.key)) {
> +warn_report("Failed to configure eBPF RSS");
>  return false;
>  }
>
>  if (!virtio_net_attach_ebpf_to_backend(n->nic,
> n->ebpf_rss.program_fd)) {
> +warn_report("Failed to attach eBPF to backend");
>  return false;
>  }
>
> @@ -1278,16 +1280,10 @@ static void virtio_net_detach_epbf_rss(VirtIONet
> *n)
>  static void virtio_net_commit_rss_config(VirtIONet *n)
>  {
>  if (n->rss_data.enabled) {
> -n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
> +n->rss_data.enabled_software_rss = n->rss_data.populate_hash ||
> +   !virtio_net_attach_epbf_rss(n);
>  if (n->rss_data.populate_hash) {
>  virtio_net_detach_epbf_rss(n);
> -} else if (!virtio_net_attach_epbf_rss(n)) {
> -if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
> -warn_report("Can't load eBPF RSS for vhost");
> -} else {
> -warn_report("Can't load eBPF RSS - fallback to software
> RSS");
> -n->rss_data.enabled_software_rss = true;
> -}
>  }
>
>  trace_virtio_net_rss_enable(n->rss_data.hash_types,
> @@ -3747,8 +3743,13 @@ static void virtio_net_device_realize(DeviceState
> *dev, Error **errp)
>
>  if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS) &&
>  !virtio_net_load_ebpf(n)) {
> -error_setg(errp, "Can't load eBPF RSS");
> -virtio_net_device_unrealize(dev);
> +if (get_vhost_net(nc->peer)) {
> +error_setg(errp, "Can't load eBPF RSS for vhost");
> +virtio_net_device_unrealize(dev);
> +return;
> +}
> +
> +warn_report_once("Can't load eBPF RSS - fallback to software
> RSS");
>  }
>  }
>
> --
> 2.42.0
>
>

Re: [PATCH v6 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-10-30 Thread Yuri Benditovich

On Mon, Oct 30, 2023 at 7:14 AM Akihiko Odaki 
wrote:

> vhost requires eBPF for RSS. When eBPF is not available, virtio-net
> implicitly disables RSS even if the user explicitly requests it. Return
> an error instead of implicitly disabling RSS if RSS is requested but not
> available.
>

I think that suggesting RSS feature when in fact it is not available is not
a good idea, this rather desinforms the guest.
Existing behavior (IMHO) makes more sense.
We can extend this discussion if needed, of course.


> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/virtio-net.c | 97 ++---
>  1 file changed, 48 insertions(+), 49 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 5d4afd12b2..7bb91617d0 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -792,9 +792,6 @@ static uint64_t virtio_net_get_features(VirtIODevice
> *vdev, uint64_t features,
>  return features;
>  }
>
> -if (!ebpf_rss_is_loaded(>ebpf_rss)) {
> -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> -}
>  features = vhost_net_get_features(get_vhost_net(nc->peer), features);
>  vdev->backend_features = features;
>
> @@ -3533,6 +3530,50 @@ static bool
> failover_hide_primary_device(DeviceListener *listener,
>  return qatomic_read(>failover_primary_hidden);
>  }
>
> +static void virtio_net_device_unrealize(DeviceState *dev)
> +{
> +VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +VirtIONet *n = VIRTIO_NET(dev);
> +int i, max_queue_pairs;
> +
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> +virtio_net_unload_ebpf(n);
> +}
> +
> +/* This will stop vhost backend if appropriate. */
> +virtio_net_set_status(vdev, 0);
> +
> +g_free(n->netclient_name);
> +n->netclient_name = NULL;
> +g_free(n->netclient_type);
> +n->netclient_type = NULL;
> +
> +g_free(n->mac_table.macs);
> +g_free(n->vlans);
> +
> +if (n->failover) {
> +qobject_unref(n->primary_opts);
> +device_listener_unregister(>primary_listener);
> +migration_remove_notifier(>migration_state);
> +} else {
> +assert(n->primary_opts == NULL);
> +}
> +
> +max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> +for (i = 0; i < max_queue_pairs; i++) {
> +virtio_net_del_queue(n, i);
> +}
> +/* delete also control vq */
> +virtio_del_queue(vdev, max_queue_pairs * 2);
> +qemu_announce_timer_del(>announce_timer, false);
> +g_free(n->vqs);
> +qemu_del_nic(n->nic);
> +virtio_net_rsc_cleanup(n);
> +g_free(n->rss_data.indirections_table);
> +net_rx_pkt_uninit(n->rx_pkt);
> +virtio_cleanup(vdev);
> +}
> +
>  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> @@ -3704,53 +3745,11 @@ static void virtio_net_device_realize(DeviceState
> *dev, Error **errp)
>
>  net_rx_pkt_init(>rx_pkt);
>
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_load_ebpf(n);
> -}
> -}
> -
> -static void virtio_net_device_unrealize(DeviceState *dev)
> -{
> -VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> -VirtIONet *n = VIRTIO_NET(dev);
> -int i, max_queue_pairs;
> -
> -if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
> -virtio_net_unload_ebpf(n);
> +if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS) &&
> +!virtio_net_load_ebpf(n)) {
> +error_setg(errp, "Can't load eBPF RSS");
> +virtio_net_device_unrealize(dev);
>  }
> -
> -/* This will stop vhost backend if appropriate. */
> -virtio_net_set_status(vdev, 0);
> -
> -g_free(n->netclient_name);
> -n->netclient_name = NULL;
> -g_free(n->netclient_type);
> -n->netclient_type = NULL;
> -
> -g_free(n->mac_table.macs);
> -g_free(n->vlans);
> -
> -if (n->failover) {
> -qobject_unref(n->primary_opts);
> -device_listener_unregister(>primary_listener);
> -migration_remove_notifier(>migration_state);
> -} else {
> -assert(n->primary_opts == NULL);
> -}
> -
> -max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
> -for (i = 0; i < max_queue_pairs; i++) {
> -virtio_net_del_queue(n, i);
> -}
> -/* delete also control vq */
> -virtio_del_queue(vdev, max_queue_pairs * 2);
> -qemu_announce_timer_del(>announce_timer, false);
> -g_free(n->vqs);
> -qemu_del_nic(n->nic);
> -virtio_net_rsc_cleanup(n);
> -g_free(n->rss_data.indirections_table);
> -net_rx_pkt_uninit(n->rx_pkt);
> -virtio_cleanup(vdev);
>  }
>
>  static void virtio_net_reset(VirtIODevice *vdev)
> --
> 2.42.0
>
>

Re: [PATCH v5 11/21] virtio-net: Return an error when vhost cannot enable RSS

2023-10-29 Thread Yuri Benditovich

On Tue, Oct 17, 2023 at 7:10 AM Akihiko Odaki 
wrote:

> vhost requires eBPF for RSS. Even when eBPF is not available, virtio-net
> reported RSS availability, and raised a warning only after the
> guest requested RSS, and the guest could not know that RSS is not
> available.
>
>
The existing code suggests the RSS feature for vhost case only when the
ebpf is loaded.
https://github.com/qemu/qemu/blob/master/hw/net/virtio-net.c#L828
Am I wrong?



> Check RSS availability during device realization and return an error
> if RSS is requested but not available. Assert RSS availability when
> the guest actually requests the feature.
>
> Signed-off-by: Akihiko Odaki 
> ---
>  ebpf/ebpf_rss.h  |   2 +-
>  ebpf/ebpf_rss-stub.c |   4 +-
>  ebpf/ebpf_rss.c  |  68 +-
>  hw/net/virtio-net.c  | 114 +--
>  4 files changed, 82 insertions(+), 106 deletions(-)
>
> diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
> index bf3f2572c7..1128173572 100644
> --- a/ebpf/ebpf_rss.h
> +++ b/ebpf/ebpf_rss.h
> @@ -36,7 +36,7 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
>
>  bool ebpf_rss_load(struct EBPFRSSContext *ctx);
>
> -bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig
> *config,
> +void ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig
> *config,
>uint16_t *indirections_table, uint8_t
> *toeplitz_key);
>
>  void ebpf_rss_unload(struct EBPFRSSContext *ctx);
> diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
> index e71e229190..525b358597 100644
> --- a/ebpf/ebpf_rss-stub.c
> +++ b/ebpf/ebpf_rss-stub.c
> @@ -28,10 +28,10 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
>  return false;
>  }
>
> -bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig
> *config,
> +void ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig
> *config,
>uint16_t *indirections_table, uint8_t *toeplitz_key)
>  {
> -return false;
> +g_assert_not_reached();
>  }
>
>  void ebpf_rss_unload(struct EBPFRSSContext *ctx)
> diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
> index cee658c158..6cdf82d059 100644
> --- a/ebpf/ebpf_rss.c
> +++ b/ebpf/ebpf_rss.c
> @@ -74,42 +74,32 @@ error:
>  return false;
>  }
>
> -static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
> +static void ebpf_rss_set_config(struct EBPFRSSContext *ctx,
>  struct EBPFRSSConfig *config)
>  {
>  uint32_t map_key = 0;
>
> -if (!ebpf_rss_is_loaded(ctx)) {
> -return false;
> -}
> -if (bpf_map_update_elem(ctx->map_configuration,
> -_key, config, 0) < 0) {
> -return false;
> -}
> -return true;
> +assert(ebpf_rss_is_loaded(ctx));
> +assert(!bpf_map_update_elem(ctx->map_configuration, _key, config,
> 0));
>  }
>
> -static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
> +static void ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
>  uint16_t *indirections_table,
>  size_t len)
>  {
>  uint32_t i = 0;
>
> -if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
> -   len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
> -return false;
> -}
> +assert(ebpf_rss_is_loaded(ctx));
> +assert(indirections_table);
> +assert(len <= VIRTIO_NET_RSS_MAX_TABLE_LEN);
>
>  for (; i < len; ++i) {
> -if (bpf_map_update_elem(ctx->map_indirections_table, ,
> -indirections_table + i, 0) < 0) {
> -return false;
> -}
> +assert(!bpf_map_update_elem(ctx->map_indirections_table, ,
> +indirections_table + i, 0));
>  }
> -return true;
>  }
>
> -static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
> +static void ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
>   uint8_t *toeplitz_key)
>  {
>  uint32_t map_key = 0;
> @@ -117,41 +107,29 @@ static bool ebpf_rss_set_toepliz_key(struct
> EBPFRSSContext *ctx,
>  /* prepare toeplitz key */
>  uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
>
> -if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) {
> -return false;
> -}
> +assert(ebpf_rss_is_loaded(ctx));
> +assert(toeplitz_key);
> +
>  memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
>  *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
>
> -if (bpf_map_update_elem(ctx->map_toeplitz_key, _key, toe,
> -0) < 0) {
> -return false;
> -}
> -return true;
> +assert(!bpf_map_update_elem(ctx->map_toeplitz_key, _key, toe, 0));
>  }
>
> -bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig
> *config,
> +void ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig
> *config,
>

Re: [PATCH v5 15/21] virtio-net: Do not clear VIRTIO_NET_F_HASH_REPORT

2023-10-29 Thread Yuri Benditovich

This patch allows  VIRTIO_NET_F_HASH_REPORT feature to the adapter whose
backend does not have a virtio header and does not have offload features
that depend on it.
The migration between such different systems is very problematic even if it
seems successful, such setups are not performance-oriented and especially
supporting the hash delivery for them is (IMHO) redundant, it just requires
more testing and does not bring any advantage.

On Fri, Oct 27, 2023 at 11:07 AM Akihiko Odaki 
wrote:

> On 2023/10/27 16:14, Jason Wang wrote:
> > On Tue, Oct 17, 2023 at 12:14 PM Akihiko Odaki 
> wrote:
> >>
> >> virtio-net can report hash values even if the peer does not have a
> >> virtio-net header.
> >
> > Do we need a compat flag for this?
>
> I don't think so. This change actually fixes the migration from a system
> with tap devices that support virtio-net headers to a system with tap
> devices that do not support virtio-net headers. Such a compatibility
> flag will revert the fix.
>
> Regards,
> Akihiko Odaki
>

Re: [PATCH v2 0/4] virtio-net: add USO feature (UDP segmentation offload)

2023-08-09 Thread Yuri Benditovich

ping

On Tue, Aug 1, 2023 at 1:32 AM Yuri Benditovich 
wrote:

> Starting from 6.2 the kernel supports UDP segmentation offload, it
> uses GSO_UDP_L4 to mark packets with UDP sermentation request
>
> v1->v2:
>  Enable USO features by default starting from 8.1
>  Move command-line parameters to the last patch
>
> Andrew Melnychenko (2):
>   tap: Add USO support to tap device.
>   virtio-net: Add USO flags to vhost support.
>
> Yuri Benditovich (2):
>   tap: Add check for USO features
>   virtio-net: Add support for USO features
>
>  hw/core/machine.c|  4 
>  hw/net/e1000e_core.c |  2 +-
>  hw/net/igb_core.c|  2 +-
>  hw/net/vhost_net.c   |  3 +++
>  hw/net/virtio-net.c  | 35 ---
>  hw/net/vmxnet3.c |  2 ++
>  include/net/net.h|  7 +--
>  net/net.c| 13 +++--
>  net/tap-bsd.c|  7 ++-
>  net/tap-linux.c  | 27 ---
>  net/tap-linux.h  |  2 ++
>  net/tap-solaris.c|  7 ++-
>  net/tap-stub.c   |  7 ++-
>  net/tap-win32.c  |  2 +-
>  net/tap.c| 18 +++---
>  net/tap_int.h|  4 +++-
>  net/vhost-vdpa.c |  3 +++
>  17 files changed, 125 insertions(+), 20 deletions(-)
>
> --
> 2.34.3
>
>

[PATCH v2 0/4] virtio-net: add USO feature (UDP segmentation offload)

2023-07-31 Thread Yuri Benditovich

Starting from 6.2 the kernel supports UDP segmentation offload, it
uses GSO_UDP_L4 to mark packets with UDP sermentation request

v1->v2:
 Enable USO features by default starting from 8.1
 Move command-line parameters to the last patch

Andrew Melnychenko (2):
  tap: Add USO support to tap device.
  virtio-net: Add USO flags to vhost support.

Yuri Benditovich (2):
  tap: Add check for USO features
  virtio-net: Add support for USO features

 hw/core/machine.c|  4 
 hw/net/e1000e_core.c |  2 +-
 hw/net/igb_core.c|  2 +-
 hw/net/vhost_net.c   |  3 +++
 hw/net/virtio-net.c  | 35 ---
 hw/net/vmxnet3.c |  2 ++
 include/net/net.h|  7 +--
 net/net.c| 13 +++--
 net/tap-bsd.c|  7 ++-
 net/tap-linux.c  | 27 ---
 net/tap-linux.h  |  2 ++
 net/tap-solaris.c|  7 ++-
 net/tap-stub.c   |  7 ++-
 net/tap-win32.c  |  2 +-
 net/tap.c| 18 +++---
 net/tap_int.h|  4 +++-
 net/vhost-vdpa.c |  3 +++
 17 files changed, 125 insertions(+), 20 deletions(-)

-- 
2.34.3

[PATCH v2 4/4] virtio-net: Add support for USO features

2023-07-31 Thread Yuri Benditovich

USO features of virtio-net device depend on kernel ability
to support them, for backward compatibility by default the
features are disabled on 8.0 and earlier.

Signed-off-by: Yuri Benditovich 
Signed-off-by: Andrew Melnychecnko 
---
 hw/core/machine.c   |  4 
 hw/net/virtio-net.c | 31 +--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index f0d35c6401..a725e76738 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -38,10 +38,14 @@
 #include "exec/confidential-guest-support.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-net.h"
 
 GlobalProperty hw_compat_8_0[] = {
 { "migration", "multifd-flush-after-each-section", "on"},
 { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
+{ TYPE_VIRTIO_NET, "host_uso", "off"},
+{ TYPE_VIRTIO_NET, "guest_uso4", "off"},
+{ TYPE_VIRTIO_NET, "guest_uso6", "off"},
 };
 const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d2311e7d6e..bd0ead94fe 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -659,6 +659,15 @@ static int peer_has_ufo(VirtIONet *n)
 return n->has_ufo;
 }
 
+static int peer_has_uso(VirtIONet *n)
+{
+if (!peer_has_vnet_hdr(n)) {
+return 0;
+}
+
+return qemu_has_uso(qemu_get_queue(n->nic)->peer);
+}
+
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
int version_1, int hash_report)
 {
@@ -796,6 +805,10 @@ static uint64_t virtio_net_get_features(VirtIODevice 
*vdev, uint64_t features,
 virtio_clear_feature(, VIRTIO_NET_F_GUEST_TSO6);
 virtio_clear_feature(, VIRTIO_NET_F_GUEST_ECN);
 
+virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
+
 virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
 }
 
@@ -804,6 +817,12 @@ static uint64_t virtio_net_get_features(VirtIODevice 
*vdev, uint64_t features,
 virtio_clear_feature(, VIRTIO_NET_F_HOST_UFO);
 }
 
+if (!peer_has_uso(n)) {
+virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
+}
+
 if (!get_vhost_net(nc->peer)) {
 return features;
 }
@@ -864,14 +883,16 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
 }
 
-static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
+static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
 {
 static const uint64_t guest_offloads_mask =
 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
-(1ULL << VIRTIO_NET_F_GUEST_UFO);
+(1ULL << VIRTIO_NET_F_GUEST_UFO)  |
+(1ULL << VIRTIO_NET_F_GUEST_USO4) |
+(1ULL << VIRTIO_NET_F_GUEST_USO6);
 
 return guest_offloads_mask & features;
 }
@@ -3924,6 +3945,12 @@ static Property virtio_net_properties[] = {
 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
+DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
+  VIRTIO_NET_F_GUEST_USO4, true),
+DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
+  VIRTIO_NET_F_GUEST_USO6, true),
+DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
+  VIRTIO_NET_F_HOST_USO, true),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.34.3

[PATCH v2 1/4] tap: Add USO support to tap device.

2023-07-31 Thread Yuri Benditovich

From: Andrew Melnychenko 

Passing additional parameters (USOv4 and USOv6 offloads) when
setting TAP offloads

Signed-off-by: Yuri Benditovich 
Signed-off-by: Andrew Melnychenko 
---
 hw/net/e1000e_core.c |  2 +-
 hw/net/igb_core.c|  2 +-
 hw/net/virtio-net.c  |  4 +++-
 hw/net/vmxnet3.c |  2 ++
 include/net/net.h|  4 ++--
 net/net.c|  4 ++--
 net/tap-bsd.c|  2 +-
 net/tap-linux.c  | 15 ---
 net/tap-linux.h  |  2 ++
 net/tap-solaris.c|  2 +-
 net/tap-stub.c   |  2 +-
 net/tap-win32.c  |  2 +-
 net/tap.c|  6 +++---
 net/tap_int.h|  3 ++-
 14 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index f8aeafa16b..d4055956ad 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -2852,7 +2852,7 @@ e1000e_update_rx_offloads(E1000ECore *core)
 
 if (core->has_vnet) {
 qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0);
+ cso_state, 0, 0, 0, 0, 0, 0);
 }
 }
 
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 8b6b75c522..389eef1549 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -2753,7 +2753,7 @@ igb_update_rx_offloads(IGBCore *core)
 
 if (core->has_vnet) {
 qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0);
+ cso_state, 0, 0, 0, 0, 0, 0);
 }
 }
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7102ec4817..d2311e7d6e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -859,7 +859,9 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
-!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
+!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
+!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
+!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
 }
 
 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 5dfacb1098..886adae42b 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State *s)
  s->lro_supported,
  s->lro_supported,
  0,
+ 0,
+ 0,
  0);
 }
 }
diff --git a/include/net/net.h b/include/net/net.h
index 1448d00afb..b5ccfbbffb 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -58,7 +58,7 @@ typedef bool (HasVnetHdr)(NetClientState *);
 typedef bool (HasVnetHdrLen)(NetClientState *, int);
 typedef bool (GetUsingVnetHdr)(NetClientState *);
 typedef void (UsingVnetHdr)(NetClientState *, bool);
-typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
+typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int);
 typedef int (GetVnetHdrLen)(NetClientState *);
 typedef void (SetVnetHdrLen)(NetClientState *, int);
 typedef int (SetVnetLE)(NetClientState *, bool);
@@ -192,7 +192,7 @@ bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
 bool qemu_get_using_vnet_hdr(NetClientState *nc);
 void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-  int ecn, int ufo);
+  int ecn, int ufo, int uso4, int uso6);
 int qemu_get_vnet_hdr_len(NetClientState *nc);
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
 int qemu_set_vnet_le(NetClientState *nc, bool is_le);
diff --git a/net/net.c b/net/net.c
index 6492ad530e..543e6dec43 100644
--- a/net/net.c
+++ b/net/net.c
@@ -532,13 +532,13 @@ void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
 }
 
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-  int ecn, int ufo)
+  int ecn, int ufo, int uso4, int uso6)
 {
 if (!nc || !nc->info->set_offload) {
 return;
 }
 
-nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
+nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo, uso4, uso6);
 }
 
 int qemu_get_vnet_hdr_len(NetClientState *nc)
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 4c98fdd337..abd16a2ad2 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -232,7 +232,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-int tso6, int

[PATCH v2 3/4] virtio-net: Add USO flags to vhost support.

2023-07-31 Thread Yuri Benditovich

From: Andrew Melnychenko 

New features are subject to check with vhost-user and vdpa.

Signed-off-by: Yuri Benditovich 
Signed-off-by: Andrew Melnychenko 
---
 hw/net/vhost_net.c | 3 +++
 net/vhost-vdpa.c   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 6b958d6363..57427a3997 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -78,6 +78,9 @@ static const int user_feature_bits[] = {
 VIRTIO_F_RING_RESET,
 VIRTIO_NET_F_RSS,
 VIRTIO_NET_F_HASH_REPORT,
+VIRTIO_NET_F_GUEST_USO4,
+VIRTIO_NET_F_GUEST_USO6,
+VIRTIO_NET_F_HOST_USO,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 9795306742..1dca37aae2 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -75,11 +75,14 @@ const int vdpa_feature_bits[] = {
 VIRTIO_NET_F_GUEST_TSO4,
 VIRTIO_NET_F_GUEST_TSO6,
 VIRTIO_NET_F_GUEST_UFO,
+VIRTIO_NET_F_GUEST_USO4,
+VIRTIO_NET_F_GUEST_USO6,
 VIRTIO_NET_F_HASH_REPORT,
 VIRTIO_NET_F_HOST_ECN,
 VIRTIO_NET_F_HOST_TSO4,
 VIRTIO_NET_F_HOST_TSO6,
 VIRTIO_NET_F_HOST_UFO,
+VIRTIO_NET_F_HOST_USO,
 VIRTIO_NET_F_MQ,
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
-- 
2.34.3

[PATCH v2 2/4] tap: Add check for USO features

2023-07-31 Thread Yuri Benditovich

Tap indicates support for USO features according to
capabilities of current kernel module.

Signed-off-by: Yuri Benditovich 
Signed-off-by: Andrew Melnychecnko 
---
 include/net/net.h |  3 +++
 net/net.c |  9 +
 net/tap-bsd.c |  5 +
 net/tap-linux.c   | 12 
 net/tap-solaris.c |  5 +
 net/tap-stub.c|  5 +
 net/tap.c | 12 
 net/tap_int.h |  1 +
 8 files changed, 52 insertions(+)

diff --git a/include/net/net.h b/include/net/net.h
index b5ccfbbffb..330d285930 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -54,6 +54,7 @@ typedef void (LinkStatusChanged)(NetClientState *);
 typedef void (NetClientDestructor)(NetClientState *);
 typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
 typedef bool (HasUfo)(NetClientState *);
+typedef bool (HasUso)(NetClientState *);
 typedef bool (HasVnetHdr)(NetClientState *);
 typedef bool (HasVnetHdrLen)(NetClientState *, int);
 typedef bool (GetUsingVnetHdr)(NetClientState *);
@@ -84,6 +85,7 @@ typedef struct NetClientInfo {
 QueryRxFilter *query_rx_filter;
 NetPoll *poll;
 HasUfo *has_ufo;
+HasUso *has_uso;
 HasVnetHdr *has_vnet_hdr;
 HasVnetHdrLen *has_vnet_hdr_len;
 GetUsingVnetHdr *get_using_vnet_hdr;
@@ -187,6 +189,7 @@ void qemu_set_info_str(NetClientState *nc,
const char *fmt, ...) G_GNUC_PRINTF(2, 3);
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
 bool qemu_has_ufo(NetClientState *nc);
+bool qemu_has_uso(NetClientState *nc);
 bool qemu_has_vnet_hdr(NetClientState *nc);
 bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
 bool qemu_get_using_vnet_hdr(NetClientState *nc);
diff --git a/net/net.c b/net/net.c
index 543e6dec43..b110e61f66 100644
--- a/net/net.c
+++ b/net/net.c
@@ -495,6 +495,15 @@ bool qemu_has_ufo(NetClientState *nc)
 return nc->info->has_ufo(nc);
 }
 
+bool qemu_has_uso(NetClientState *nc)
+{
+if (!nc || !nc->info->has_uso) {
+return false;
+}
+
+return nc->info->has_uso(nc);
+}
+
 bool qemu_has_vnet_hdr(NetClientState *nc)
 {
 if (!nc || !nc->info->has_vnet_hdr) {
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index abd16a2ad2..274ea7bd2c 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -212,6 +212,11 @@ int tap_probe_has_ufo(int fd)
 return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
 return 0;
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 30fcca1bc2..c7e514ecb0 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -173,6 +173,18 @@ int tap_probe_has_ufo(int fd)
 return 1;
 }
 
+int tap_probe_has_uso(int fd)
+{
+unsigned offload;
+
+offload = TUN_F_CSUM | TUN_F_USO4 | TUN_F_USO6;
+
+if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) {
+return 0;
+}
+return 1;
+}
+
 /* Verify that we can assign given length */
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index a617a10e5c..08b13af512 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -216,6 +216,11 @@ int tap_probe_has_ufo(int fd)
 return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
 return 0;
diff --git a/net/tap-stub.c b/net/tap-stub.c
index ac8dfc03b4..4b24f61e3a 100644
--- a/net/tap-stub.c
+++ b/net/tap-stub.c
@@ -47,6 +47,11 @@ int tap_probe_has_ufo(int fd)
 return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
 return 0;
diff --git a/net/tap.c b/net/tap.c
index 14ea4ef26f..bcea8d03f9 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -57,6 +57,7 @@ typedef struct TAPState {
 bool write_poll;
 bool using_vnet_hdr;
 bool has_ufo;
+bool has_uso;
 bool enabled;
 VHostNetState *vhost_net;
 unsigned host_vnet_hdr_len;
@@ -237,6 +238,15 @@ static bool tap_has_ufo(NetClientState *nc)
 return s->has_ufo;
 }
 
+static bool tap_has_uso(NetClientState *nc)
+{
+TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
+
+return s->has_uso;
+}
+
 static bool tap_has_vnet_hdr(NetClientState *nc)
 {
 TAPState *s = DO_UPCAST(TAPState, nc, nc);
@@ -384,6 +394,7 @@ static NetClientInfo net_tap_info = {
 .poll = tap_poll,
 .cleanup = tap_cleanup,
 .has_ufo = tap_has_ufo,
+.has_uso = tap_has_uso,
 .has_vnet_hdr = tap_has_vnet_hdr,
 .has_vnet_hdr_len = tap_has_vnet_hdr_len,
 .get_using_vnet_hdr = tap_get_using_vnet_hdr,
@@ -413,6 +424,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
 s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 s->using_vnet_hdr = false;
 s->has_ufo = tap_probe_has_ufo(s->fd);
+s->has_uso = tap_probe_has_uso(s->fd);
 s->enabled = true;
 tap_set_offload(>nc, 0, 0, 0, 0, 0, 0, 0);

[PATCH] pci: do not respond config requests after PCI device eject

2023-07-28 Thread Yuri Benditovich

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2224964

In migration with VF failover, Windows guest and ACPI hot
unplug we do not need to satisfy config requests, otherwise
the guest immediately detects the device and brings up its
driver. Many network VF's are stuck on the guest PCI bus after
the migration.

Signed-off-by: Yuri Benditovich 
---
 hw/pci/pci_host.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index 7af8afdcbe..a18aa0a8d4 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -62,6 +62,17 @@ static void pci_adjust_config_limit(PCIBus *bus, uint32_t 
*limit)
 }
 }
 
+static bool is_pci_dev_ejected(PCIDevice *pci_dev)
+{
+/*
+ * device unplug was requested and the guest acked it,
+ * so we stop responding config accesses even if the
+ * device is not deleted (failover flow)
+ */
+return pci_dev && pci_dev->partially_hotplugged &&
+   !pci_dev->qdev.pending_deleted_event;
+}
+
 void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
   uint32_t limit, uint32_t val, uint32_t len)
 {
@@ -75,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, 
uint32_t addr,
  * allowing direct removal of unexposed functions.
  */
 if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
-!pci_dev->has_power) {
+!pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
 return;
 }
 
@@ -100,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, 
uint32_t addr,
  * allowing direct removal of unexposed functions.
  */
 if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
-!pci_dev->has_power) {
+!pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
 return ~0x0;
 }
 
-- 
2.34.3

Re: [PATCH 3/4] virtio-net: added USO support

2023-07-20 Thread Yuri Benditovich

On Thu, Jul 20, 2023 at 3:37 AM Akihiko Odaki 
wrote:

> On 2023/07/20 0:21, Yuri Benditovich wrote:
> > virtio-net can suggest USO features TX, RX v4 and RX v6,
> > depending on kernel TUN ability to support them. These
> > features require explicit enable in command-line.
>
> Shouldn't we enable these by default as the other offload features are?
>

My suggestion is to add these features as disabled by default and
reevaluate the
possibility to enable later.
If we enable them by default we'll also need to disable them by default in
previous
generations of machine types.


> >
> > Signed-off-by: Yuri Benditovich 
> > ---
> >   hw/net/virtio-net.c | 16 ++--
> >   1 file changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index d2311e7d6e..e76cad923b 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -796,6 +796,10 @@ static uint64_t
> virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
> >   virtio_clear_feature(, VIRTIO_NET_F_GUEST_TSO6);
> >   virtio_clear_feature(, VIRTIO_NET_F_GUEST_ECN);
> >
> > +virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
> > +virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
> > +virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
> > +
> >   virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
> >   }
> >
> > @@ -864,14 +868,16 @@ static void
> virtio_net_apply_guest_offloads(VirtIONet *n)
> >   !!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_USO6)));
> >   }
> >
> > -static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
> > +static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
> >   {
> >   static const uint64_t guest_offloads_mask =
> >   (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
> >   (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
> >   (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
> >   (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
> > -(1ULL << VIRTIO_NET_F_GUEST_UFO);
> > +(1ULL << VIRTIO_NET_F_GUEST_UFO)  |
> > +(1ULL << VIRTIO_NET_F_GUEST_USO4) |
> > +(1ULL << VIRTIO_NET_F_GUEST_USO6);
> >
> >   return guest_offloads_mask & features;
> >   }
> > @@ -3924,6 +3930,12 @@ static Property virtio_net_properties[] = {
> >   DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed,
> SPEED_UNKNOWN),
> >   DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
> >   DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
> > +DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
> > +  VIRTIO_NET_F_GUEST_USO4, false),
> > +DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
> > +  VIRTIO_NET_F_GUEST_USO6, false),
> > +DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
> > +  VIRTIO_NET_F_HOST_USO, false),
> >   DEFINE_PROP_END_OF_LIST(),
> >   };
> >
>

Re: [PATCH 1/4] tap: Added USO support to tap device.

2023-07-20 Thread Yuri Benditovich

On Thu, Jul 20, 2023 at 3:31 AM Akihiko Odaki 
wrote:

> Nitpicking: the subject of this patch is somewhat unconventional. What
> about: "tap: Add USO support to tap device"?
>
> Will take it in account in v2


> On 2023/07/20 0:21, Yuri Benditovich wrote:
> > From: Andrew Melnychenko 
> >
> > Passing additional parameters (USOv4 and USOv6 offloads) when
> > setting TAP offloads
> >
> > Signed-off-by: Yuri Benditovich 
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >   hw/net/e1000e_core.c |  2 +-
> >   hw/net/igb_core.c|  2 +-
> >   hw/net/virtio-net.c  |  4 +++-
> >   hw/net/vmxnet3.c |  2 ++
> >   include/net/net.h|  4 ++--
> >   net/net.c|  4 ++--
> >   net/tap-bsd.c|  2 +-
> >   net/tap-linux.c  | 15 ---
> >   net/tap-linux.h  |  2 ++
> >   net/tap-solaris.c|  2 +-
> >   net/tap-stub.c   |  2 +-
> >   net/tap-win32.c  |  2 +-
> >   net/tap.c|  6 +++---
> >   net/tap_int.h|  3 ++-
> >   14 files changed, 34 insertions(+), 18 deletions(-)
> >
> > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> > index f8aeafa16b..d4055956ad 100644
> > --- a/hw/net/e1000e_core.c
> > +++ b/hw/net/e1000e_core.c
> > @@ -2852,7 +2852,7 @@ e1000e_update_rx_offloads(E1000ECore *core)
> >
> >   if (core->has_vnet) {
> >   qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
> > - cso_state, 0, 0, 0, 0);
> > + cso_state, 0, 0, 0, 0, 0, 0);
> >   }
> >   }
> >
> > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
> > index 8b6b75c522..389eef1549 100644
> > --- a/hw/net/igb_core.c
> > +++ b/hw/net/igb_core.c
> > @@ -2753,7 +2753,7 @@ igb_update_rx_offloads(IGBCore *core)
> >
> >   if (core->has_vnet) {
> >   qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
> > - cso_state, 0, 0, 0, 0);
> > + cso_state, 0, 0, 0, 0, 0, 0);
> >   }
> >   }
> >
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 7102ec4817..d2311e7d6e 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -859,7 +859,9 @@ static void
> virtio_net_apply_guest_offloads(VirtIONet *n)
> >   !!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_TSO4)),
> >   !!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_TSO6)),
> >   !!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_ECN)),
> > -!!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_UFO)));
> > +!!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_UFO)),
> > +!!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_USO4)),
> > +!!(n->curr_guest_offloads & (1ULL <<
> VIRTIO_NET_F_GUEST_USO6)));
> >   }
> >
> >   static uint64_t virtio_net_guest_offloads_by_features(uint32_t
> features)
> > diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
> > index 5dfacb1098..886adae42b 100644
> > --- a/hw/net/vmxnet3.c
> > +++ b/hw/net/vmxnet3.c
> > @@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State
> *s)
> >s->lro_supported,
> >s->lro_supported,
> >0,
> > + 0,
> > + 0,
> >0);
> >   }
> >   }
> > diff --git a/include/net/net.h b/include/net/net.h
> > index 1448d00afb..b5ccfbbffb 100644
> > --- a/include/net/net.h
> > +++ b/include/net/net.h
> > @@ -58,7 +58,7 @@ typedef bool (HasVnetHdr)(NetClientState *);
> >   typedef bool (HasVnetHdrLen)(NetClientState *, int);
> >   typedef bool (GetUsingVnetHdr)(NetClientState *);
> >   typedef void (UsingVnetHdr)(NetClientState *, bool);
> > -typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
> > +typedef void (SetOffload)(NetClientState *, int, int, int, int, int,
> int, int);
> >   typedef int (GetVnetHdrLen)(NetClientState *);
> >   typedef void (SetVnetHdrLen)(NetClientState *, int);
> >   typedef int (SetVnetLE)(NetClientState *, bool);
> > @@ -192,7 +192,7 @@ bool qemu_has_vnet_hdr_len(NetClientState *nc, int
> len);
> >   bool qemu_get_using_vnet_hdr(NetClientSt

[PATCH 2/4] virtio-net: Added USO flags to vhost support.

2023-07-19 Thread Yuri Benditovich

From: Andrew Melnychenko 

New features are subject to check with vhost-user and vdpa.

Signed-off-by: Yuri Benditovich 
Signed-off-by: Andrew Melnychenko 
---
 hw/net/vhost_net.c | 3 +++
 net/vhost-vdpa.c   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 6b958d6363..57427a3997 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -78,6 +78,9 @@ static const int user_feature_bits[] = {
 VIRTIO_F_RING_RESET,
 VIRTIO_NET_F_RSS,
 VIRTIO_NET_F_HASH_REPORT,
+VIRTIO_NET_F_GUEST_USO4,
+VIRTIO_NET_F_GUEST_USO6,
+VIRTIO_NET_F_HOST_USO,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 9795306742..1dca37aae2 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -75,11 +75,14 @@ const int vdpa_feature_bits[] = {
 VIRTIO_NET_F_GUEST_TSO4,
 VIRTIO_NET_F_GUEST_TSO6,
 VIRTIO_NET_F_GUEST_UFO,
+VIRTIO_NET_F_GUEST_USO4,
+VIRTIO_NET_F_GUEST_USO6,
 VIRTIO_NET_F_HASH_REPORT,
 VIRTIO_NET_F_HOST_ECN,
 VIRTIO_NET_F_HOST_TSO4,
 VIRTIO_NET_F_HOST_TSO6,
 VIRTIO_NET_F_HOST_UFO,
+VIRTIO_NET_F_HOST_USO,
 VIRTIO_NET_F_MQ,
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
-- 
2.34.3

[PATCH 1/4] tap: Added USO support to tap device.

2023-07-19 Thread Yuri Benditovich

From: Andrew Melnychenko 

Passing additional parameters (USOv4 and USOv6 offloads) when
setting TAP offloads

Signed-off-by: Yuri Benditovich 
Signed-off-by: Andrew Melnychenko 
---
 hw/net/e1000e_core.c |  2 +-
 hw/net/igb_core.c|  2 +-
 hw/net/virtio-net.c  |  4 +++-
 hw/net/vmxnet3.c |  2 ++
 include/net/net.h|  4 ++--
 net/net.c|  4 ++--
 net/tap-bsd.c|  2 +-
 net/tap-linux.c  | 15 ---
 net/tap-linux.h  |  2 ++
 net/tap-solaris.c|  2 +-
 net/tap-stub.c   |  2 +-
 net/tap-win32.c  |  2 +-
 net/tap.c|  6 +++---
 net/tap_int.h|  3 ++-
 14 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index f8aeafa16b..d4055956ad 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -2852,7 +2852,7 @@ e1000e_update_rx_offloads(E1000ECore *core)
 
 if (core->has_vnet) {
 qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0);
+ cso_state, 0, 0, 0, 0, 0, 0);
 }
 }
 
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 8b6b75c522..389eef1549 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -2753,7 +2753,7 @@ igb_update_rx_offloads(IGBCore *core)
 
 if (core->has_vnet) {
 qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0);
+ cso_state, 0, 0, 0, 0, 0, 0);
 }
 }
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7102ec4817..d2311e7d6e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -859,7 +859,9 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
-!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
+!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
+!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
+!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
 }
 
 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 5dfacb1098..886adae42b 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State *s)
  s->lro_supported,
  s->lro_supported,
  0,
+ 0,
+ 0,
  0);
 }
 }
diff --git a/include/net/net.h b/include/net/net.h
index 1448d00afb..b5ccfbbffb 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -58,7 +58,7 @@ typedef bool (HasVnetHdr)(NetClientState *);
 typedef bool (HasVnetHdrLen)(NetClientState *, int);
 typedef bool (GetUsingVnetHdr)(NetClientState *);
 typedef void (UsingVnetHdr)(NetClientState *, bool);
-typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
+typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int);
 typedef int (GetVnetHdrLen)(NetClientState *);
 typedef void (SetVnetHdrLen)(NetClientState *, int);
 typedef int (SetVnetLE)(NetClientState *, bool);
@@ -192,7 +192,7 @@ bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
 bool qemu_get_using_vnet_hdr(NetClientState *nc);
 void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-  int ecn, int ufo);
+  int ecn, int ufo, int uso4, int uso6);
 int qemu_get_vnet_hdr_len(NetClientState *nc);
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
 int qemu_set_vnet_le(NetClientState *nc, bool is_le);
diff --git a/net/net.c b/net/net.c
index 6492ad530e..543e6dec43 100644
--- a/net/net.c
+++ b/net/net.c
@@ -532,13 +532,13 @@ void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
 }
 
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
-  int ecn, int ufo)
+  int ecn, int ufo, int uso4, int uso6)
 {
 if (!nc || !nc->info->set_offload) {
 return;
 }
 
-nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
+nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo, uso4, uso6);
 }
 
 int qemu_get_vnet_hdr_len(NetClientState *nc)
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 4c98fdd337..abd16a2ad2 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -232,7 +232,7 @@ int tap_fd_set_vnet_be(int fd, int is_be)
 }
 
 void tap_fd_set_offload(int fd, int csum, int tso4,
-int tso6, int

[PATCH 0/4] virtio-net: add USO feature (UDP segmentation offload)

2023-07-19 Thread Yuri Benditovich

Starting from 6.2 the kernel supports UDP segmentation offload, the
kernel uses GSO_UDP_L4 to mark packets with USB sermentation request 


Andrew Melnychenko (3):
  tap: Added USO support to tap device.
  virtio-net: Added USO flags to vhost support.
  virtio-net: Added uso check

Yuri Benditovich (1):
  virtio-net: added USO support

 hw/net/e1000e_core.c |  2 +-
 hw/net/igb_core.c|  2 +-
 hw/net/vhost_net.c   |  3 +++
 hw/net/virtio-net.c  | 35 ---
 hw/net/vmxnet3.c |  2 ++
 include/net/net.h|  7 +--
 net/net.c| 13 +++--
 net/tap-bsd.c|  7 ++-
 net/tap-linux.c  | 27 ---
 net/tap-linux.h  |  2 ++
 net/tap-solaris.c|  7 ++-
 net/tap-stub.c   |  7 ++-
 net/tap-win32.c  |  2 +-
 net/tap.c| 18 +++---
 net/tap_int.h|  4 +++-
 net/vhost-vdpa.c |  3 +++
 16 files changed, 121 insertions(+), 20 deletions(-)

-- 
2.34.3

[PATCH 4/4] virtio-net: Added uso check

2023-07-19 Thread Yuri Benditovich

From: Andrew Melnychenko 

Added tap uso check with stubs for non-Linux systems.

Signed-off-by: Yuri Benditovich 
Signed-off-by: Andrew Melnychenko 
---
 hw/net/virtio-net.c | 15 +++
 include/net/net.h   |  3 +++
 net/net.c   |  9 +
 net/tap-bsd.c   |  5 +
 net/tap-linux.c | 12 
 net/tap-solaris.c   |  5 +
 net/tap-stub.c  |  5 +
 net/tap.c   | 12 
 net/tap_int.h   |  1 +
 9 files changed, 67 insertions(+)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e76cad923b..d950d3a77f 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -659,6 +659,15 @@ static int peer_has_ufo(VirtIONet *n)
 return n->has_ufo;
 }
 
+static int peer_has_uso(VirtIONet *n)
+{
+if (!peer_has_vnet_hdr(n)) {
+return 0;
+}
+
+return qemu_has_uso(qemu_get_queue(n->nic)->peer);
+}
+
 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
int version_1, int hash_report)
 {
@@ -808,6 +817,12 @@ static uint64_t virtio_net_get_features(VirtIODevice 
*vdev, uint64_t features,
 virtio_clear_feature(, VIRTIO_NET_F_HOST_UFO);
 }
 
+if (!peer_has_uso(n)) {
+virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
+}
+
 if (!get_vhost_net(nc->peer)) {
 return features;
 }
diff --git a/include/net/net.h b/include/net/net.h
index b5ccfbbffb..330d285930 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -54,6 +54,7 @@ typedef void (LinkStatusChanged)(NetClientState *);
 typedef void (NetClientDestructor)(NetClientState *);
 typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
 typedef bool (HasUfo)(NetClientState *);
+typedef bool (HasUso)(NetClientState *);
 typedef bool (HasVnetHdr)(NetClientState *);
 typedef bool (HasVnetHdrLen)(NetClientState *, int);
 typedef bool (GetUsingVnetHdr)(NetClientState *);
@@ -84,6 +85,7 @@ typedef struct NetClientInfo {
 QueryRxFilter *query_rx_filter;
 NetPoll *poll;
 HasUfo *has_ufo;
+HasUso *has_uso;
 HasVnetHdr *has_vnet_hdr;
 HasVnetHdrLen *has_vnet_hdr_len;
 GetUsingVnetHdr *get_using_vnet_hdr;
@@ -187,6 +189,7 @@ void qemu_set_info_str(NetClientState *nc,
const char *fmt, ...) G_GNUC_PRINTF(2, 3);
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
 bool qemu_has_ufo(NetClientState *nc);
+bool qemu_has_uso(NetClientState *nc);
 bool qemu_has_vnet_hdr(NetClientState *nc);
 bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
 bool qemu_get_using_vnet_hdr(NetClientState *nc);
diff --git a/net/net.c b/net/net.c
index 543e6dec43..b110e61f66 100644
--- a/net/net.c
+++ b/net/net.c
@@ -495,6 +495,15 @@ bool qemu_has_ufo(NetClientState *nc)
 return nc->info->has_ufo(nc);
 }
 
+bool qemu_has_uso(NetClientState *nc)
+{
+if (!nc || !nc->info->has_uso) {
+return false;
+}
+
+return nc->info->has_uso(nc);
+}
+
 bool qemu_has_vnet_hdr(NetClientState *nc)
 {
 if (!nc || !nc->info->has_vnet_hdr) {
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index abd16a2ad2..274ea7bd2c 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -212,6 +212,11 @@ int tap_probe_has_ufo(int fd)
 return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
 return 0;
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 30fcca1bc2..c7e514ecb0 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -173,6 +173,18 @@ int tap_probe_has_ufo(int fd)
 return 1;
 }
 
+int tap_probe_has_uso(int fd)
+{
+unsigned offload;
+
+offload = TUN_F_CSUM | TUN_F_USO4 | TUN_F_USO6;
+
+if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) {
+return 0;
+}
+return 1;
+}
+
 /* Verify that we can assign given length */
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index a617a10e5c..08b13af512 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -216,6 +216,11 @@ int tap_probe_has_ufo(int fd)
 return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
 return 0;
diff --git a/net/tap-stub.c b/net/tap-stub.c
index ac8dfc03b4..4b24f61e3a 100644
--- a/net/tap-stub.c
+++ b/net/tap-stub.c
@@ -47,6 +47,11 @@ int tap_probe_has_ufo(int fd)
 return 0;
 }
 
+int tap_probe_has_uso(int fd)
+{
+return 0;
+}
+
 int tap_probe_vnet_hdr_len(int fd, int len)
 {
 return 0;
diff --git a/net/tap.c b/net/tap.c
index 14ea4ef26f..bcea8d03f9 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -57,6 +57,7 @@ typedef struct TAPState {
 bool write_poll;
 bool using_vnet_hdr;
 bool has_ufo;
+bool has_uso;
 bool enabled;
 VHostNetState *vhost_net;
 unsigned host_vnet_hdr_len;
@@ -2

[PATCH 3/4] virtio-net: added USO support

2023-07-19 Thread Yuri Benditovich

virtio-net can suggest USO features TX, RX v4 and RX v6,
depending on kernel TUN ability to support them. These
features require explicit enable in command-line.

Signed-off-by: Yuri Benditovich 
---
 hw/net/virtio-net.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d2311e7d6e..e76cad923b 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -796,6 +796,10 @@ static uint64_t virtio_net_get_features(VirtIODevice 
*vdev, uint64_t features,
 virtio_clear_feature(, VIRTIO_NET_F_GUEST_TSO6);
 virtio_clear_feature(, VIRTIO_NET_F_GUEST_ECN);
 
+virtio_clear_feature(, VIRTIO_NET_F_HOST_USO);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO4);
+virtio_clear_feature(, VIRTIO_NET_F_GUEST_USO6);
+
 virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
 }
 
@@ -864,14 +868,16 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
 }
 
-static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
+static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
 {
 static const uint64_t guest_offloads_mask =
 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
-(1ULL << VIRTIO_NET_F_GUEST_UFO);
+(1ULL << VIRTIO_NET_F_GUEST_UFO)  |
+(1ULL << VIRTIO_NET_F_GUEST_USO4) |
+(1ULL << VIRTIO_NET_F_GUEST_USO6);
 
 return guest_offloads_mask & features;
 }
@@ -3924,6 +3930,12 @@ static Property virtio_net_properties[] = {
 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
+DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
+  VIRTIO_NET_F_GUEST_USO4, false),
+DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
+  VIRTIO_NET_F_GUEST_USO6, false),
+DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
+  VIRTIO_NET_F_HOST_USO, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.34.3

Re: [PATCH 3/5] qmp: Added the helper stamp check.

2023-02-28 Thread Yuri Benditovich

On Tue, Feb 28, 2023 at 8:05 PM Daniel P. Berrangé 
wrote:

> On Tue, Feb 28, 2023 at 11:56:27AM +0200, Yuri Benditovich wrote:
> > On Mon, Feb 20, 2023 at 11:50 AM Daniel P. Berrangé  >
> > wrote:
> >
> > > On Sun, Feb 19, 2023 at 06:20:58PM +0200, Andrew Melnychenko wrote:
> > > > Added a function to check the stamp in the helper.
> > > > eBPF helper should have a special symbol that generates during the
> build.
> > > > QEMU checks the helper and determines that it fits, so the helper
> > > > will produce proper output.
> > >
> > > I think this is quite limiting for in place upgrades.
> > >
> > > Consider this scenario
> > >
> > >  * Host has QEMU 8.1.0 installed
> > >  * VM is running QEMU 8.1.0
> > >  * QEMU 8.1.1 is released with a bug fix in the EBF program
> > >  * Host is upgraded to QEMU 8.1.1
> > >  * User attempts to hotplug a NIC to the running VM
> > >
> > > IIUC this last step is going to fail because we'll be loading
> > > the EBF program from 8.1.1 and so its hash is different from
> > > that expected by the QEMU 8.1.0 that the pre-existing VM is
> > > running.
> > >
> > >   Indeed we did not take in account the in-place upgrade.
> >
> >
> >
> > > If some changes to the EBF program are not going to be back
> > > compatible from the POV of the QEMU process, should we instead
> > > be versioning the EBF program. eg so new QEMU will ship both
> > > the old and new versions of the EBF program.
> >
> > This does not seem to be an elegant option: QEMU theoretically can
> include
> > different eBPF programs but it hardly can interface with each one of
> them.
> > The code of QEMU (access to eBPF maps etc) includes header files which
> eBPF
> > of the day is being built with them.
> >
> > I see 2 options to address this issue (of course there are more)
> > 1. Build and install qemu-rss-helper- executable. Libvirt will
> always
> > have a correct name, so for the running instance it will use
> > qemu-rss-helper-, for the new instance it will use
> > qemu-rss-helper-
>
> We'll get an ever growing number of program variants we need to
> build & distribute with each new QEMU release.
>

New release of the qemu-rss-helper- will be created in fact only
when the eBPF binary is updated.
This does not happen on each release. But yes, this looks like versioning
of all the shared libraries.


>
> > 2. Build the helper executable and link it inside qemu as a blob. Libvirt
> > will always retrieve the executable to the temporary file name and use
> it.
> > So the retrieved helper will always be compatible with QEMU. I'm not sure
> > what is the most portable way to do that.
>
> QEMU is considered an untrusted process, so there's no way we're going
> to ask it to give us an ELF binary and then execute that in privileged
> context.
>
> > Does one of these seem suitable?
>
> Neither feels very appealing to me.
>
> I've been trying to understand the eBPF code we're dealing with in a
> little more detail.
>
> IIUC, QEMU, or rather the virtio-net  driver needs to receive one FD
> for the BPF program, and one or more FDs for the BPF maps that the
> program uses. Currently it uses 3 maps, so needs 3 map FDs on top of
> the program FD.
>
> The helper program that is proposed here calls ebpf_rss_load() to
> load the program and get back a struct which gives access to the
> 4 FDs, which are then sent to the mgmt app, which forwards them
> onto QEMU.
>
> The ebpf_rss_load() method is making use of various structs that
> are specific to the RSS program implementation, but does not seems
> to do anything especially interesting.  It calls into rss_bpf__open()
> which eventually gets around to calling rss_bpf__create_skeleton
> which is where the interesting stuff happens.
>
> This rss_bpf__create_skeleton() method is implemented in terms of
> totally generic libbpf APIs, and has the actual blob that is the
> BPF program.
>
> Looking at what this does, I feel it should be trivial for a mgmt
> app to implement equivalent logic to rss_bpf__create_skeleton in a
> generic manner, if we could just expose the program blob and the
> map names to the mgmt app. eg a simple json file
>
>   {
>  "maps": [
> "tap_rss_map_configurations",
> "tap_rss_map_indirection_table",
> "tap_rss_map_toeplitz_key",
>  ],
>  "program": "the big blob encoded in base64..."
>   }
>
> if we installed that f

Re: [PATCH 3/5] qmp: Added the helper stamp check.

2023-02-28 Thread Yuri Benditovich

On Mon, Feb 20, 2023 at 11:50 AM Daniel P. Berrangé 
wrote:

> On Sun, Feb 19, 2023 at 06:20:58PM +0200, Andrew Melnychenko wrote:
> > Added a function to check the stamp in the helper.
> > eBPF helper should have a special symbol that generates during the build.
> > QEMU checks the helper and determines that it fits, so the helper
> > will produce proper output.
>
> I think this is quite limiting for in place upgrades.
>
> Consider this scenario
>
>  * Host has QEMU 8.1.0 installed
>  * VM is running QEMU 8.1.0
>  * QEMU 8.1.1 is released with a bug fix in the EBF program
>  * Host is upgraded to QEMU 8.1.1
>  * User attempts to hotplug a NIC to the running VM
>
> IIUC this last step is going to fail because we'll be loading
> the EBF program from 8.1.1 and so its hash is different from
> that expected by the QEMU 8.1.0 that the pre-existing VM is
> running.
>
>   Indeed we did not take in account the in-place upgrade.



> If some changes to the EBF program are not going to be back
> compatible from the POV of the QEMU process, should we instead
> be versioning the EBF program. eg so new QEMU will ship both
> the old and new versions of the EBF program.
>
>
This does not seem to be an elegant option: QEMU theoretically can include
different eBPF programs but it hardly can interface with each one of them.
The code of QEMU (access to eBPF maps etc) includes header files which eBPF
of the day is being built with them.

I see 2 options to address this issue (of course there are more)
1. Build and install qemu-rss-helper- executable. Libvirt will always
have a correct name, so for the running instance it will use
qemu-rss-helper-, for the new instance it will use
qemu-rss-helper-
2. Build the helper executable and link it inside qemu as a blob. Libvirt
will always retrieve the executable to the temporary file name and use it.
So the retrieved helper will always be compatible with QEMU. I'm not sure
what is the most portable way to do that.

Daniel,
Does one of these seem suitable?


> With regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>

Re: [PATCH 3/3] contrib/elf2dmp: add PE name check and Windows Server 2022 support

2021-11-04 Thread Yuri Benditovich

On Wed, Nov 3, 2021 at 6:13 PM Viktor Prutyanov
 wrote:
>
> Since its inception elf2dmp has checked MZ signatures within an
> address space above IDT[0] interrupt vector and took first PE image
> found as Windows Kernel.
> But in Windows Server 2022 memory dump this address space range is
> full of invalid PE fragments and the tool must check that PE image
> is 'ntoskrnl.exe' actually.
> So, introduce additional validation by checking image name from
> Export Directory against 'ntoskrnl.exe'.
>
> Signed-off-by: Viktor Prutyanov 
Tested-by: Yuri Benditovich 
> ---
>  contrib/elf2dmp/main.c | 28 ++--
>  contrib/elf2dmp/pe.h   | 15 +++
>  2 files changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
> index a62fde23cc..04cdd07292 100644
> --- a/contrib/elf2dmp/main.c
> +++ b/contrib/elf2dmp/main.c
> @@ -17,6 +17,7 @@
>
>  #define SYM_URL_BASE"https://msdl.microsoft.com/download/symbols/;
>  #define PDB_NAME"ntkrnlmp.pdb"
> +#define PE_NAME "ntoskrnl.exe"
>
>  #define INITIAL_MXCSR   0x1f80
>
> @@ -399,6 +400,25 @@ static int write_dump(struct pa_space *ps,
>  return fclose(dmp_file);
>  }
>
> +static bool pe_check_export_name(uint64_t base, void *start_addr,
> +struct va_space *vs)
> +{
> +IMAGE_EXPORT_DIRECTORY export_dir;
> +const char *pe_name;
> +
> +if (pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_EXPORT_DIRECTORY,
> +_dir, sizeof(export_dir), vs)) {
> +return false;
> +}
> +
> +pe_name = va_space_resolve(vs, base + export_dir.Name);
> +if (!pe_name) {
> +return false;
> +}
> +
> +return !strcmp(pe_name, PE_NAME);
> +}
> +
>  static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr,
>  char *hash, struct va_space *vs)
>  {
> @@ -483,6 +503,7 @@ int main(int argc, char *argv[])
>  uint64_t KdDebuggerDataBlock;
>  KDDEBUGGER_DATA64 *kdbg;
>  uint64_t KdVersionBlock;
> +bool kernel_found = false;
>
>  if (argc != 3) {
>  eprintf("usage:\n\t%s elf_file dmp_file\n", argv[0]);
> @@ -530,11 +551,14 @@ int main(int argc, char *argv[])
>  }
>
>  if (*(uint16_t *)nt_start_addr == 0x5a4d) { /* MZ */
> -break;
> +if (pe_check_export_name(KernBase, nt_start_addr, )) {
> +kernel_found = true;
> +break;
> +}
>  }
>  }
>
> -if (!nt_start_addr) {
> +if (!kernel_found) {
>  eprintf("Failed to find NT kernel image\n");
>  err = 1;
>  goto out_ps;
> diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h
> index 807d006364..71126af1ac 100644
> --- a/contrib/elf2dmp/pe.h
> +++ b/contrib/elf2dmp/pe.h
> @@ -88,6 +88,20 @@ typedef struct IMAGE_NT_HEADERS64 {
>  IMAGE_OPTIONAL_HEADER64 OptionalHeader;
>  } __attribute__ ((packed)) IMAGE_NT_HEADERS64;
>
> +typedef struct IMAGE_EXPORT_DIRECTORY {
> +uint32_tCharacteristics;
> +uint32_tTimeDateStamp;
> +uint16_tMajorVersion;
> +uint16_tMinorVersion;
> +uint32_tName;
> +uint32_tBase;
> +uint32_tNumberOfFunctions;
> +uint32_tNumberOfNames;
> +uint32_tAddressOfFunctions;
> +uint32_tAddressOfNames;
> +uint32_tAddressOfNameOrdinals;
> +} __attribute__ ((packed)) IMAGE_EXPORT_DIRECTORY;
> +
>  typedef struct IMAGE_DEBUG_DIRECTORY {
>  uint32_t Characteristics;
>  uint32_t TimeDateStamp;
> @@ -102,6 +116,7 @@ typedef struct IMAGE_DEBUG_DIRECTORY {
>  #define IMAGE_DEBUG_TYPE_CODEVIEW   2
>  #endif
>
> +#define IMAGE_FILE_EXPORT_DIRECTORY 0
>  #define IMAGE_FILE_DEBUG_DIRECTORY  6
>
>  typedef struct guid_t {
> --
> 2.31.1
>

Re: [PATCH 4/5] ebpf_rss_helper: Added helper for eBPF RSS.

2021-09-09 Thread Yuri Benditovich

On Thu, Sep 9, 2021 at 4:16 AM Jason Wang  wrote:
>
> On Thu, Sep 9, 2021 at 8:00 AM Yuri Benditovich
>  wrote:
> >
> > On Wed, Sep 8, 2021 at 6:45 AM Jason Wang  wrote:
> > >
> > > On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
> > >  wrote:
> > > >
> > > > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang  wrote:
> > > > >
> > > > >
> > > > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang  
> > > > > > wrote:
> > > > > >>
> > > > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > > > >>> Helper program. Loads eBPF RSS program and maps and passes them 
> > > > > >>> through unix socket.
> > > > > >>> Libvirt may launch this helper and pass eBPF fds to qemu 
> > > > > >>> virtio-net.
> > > > > >>
> > > > > >> I wonder if this can be done as helper for TAP/bridge.
> > > > > >>
> > > > > >> E.g it's the qemu to launch those helper with set-uid.
> > > > > >>
> > > > > >> Then libvirt won't even need to care about that?
> > > > > >>
> > > > > > There are pros and cons for such a solution with set-uid.
> > > > > >  From my point of view one of the cons is that set-uid is efficient
> > > > > > only at install time so the coexistence of different qemu builds 
> > > > > > (and
> > > > > > different helpers for each one) is kind of problematic.
> > > > > > With the current solution this does not present any problem: the
> > > > > > developer can have several different builds, each one automatically
> > > > > > has its own helper and there is no conflict between these builds and
> > > > > > between these builds and installed qemu package. Changing the
> > > > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > > > helper to work.
> > > > >
> > > > >
> > > > > I'm not sure I get you here. We can still have default/sample helper 
> > > > > to
> > > > > make sure it works for different builds.
> > > > >
> > > > > If we can avoid the involvement of libvirt, that would be better.
> > > >
> > > > Hi Jason,
> > > >
> > > > Indeed I did not get the idea, can you please explain it in more
> > > > details (as detailed as possible to avoid future misunderstanding),
> > > > especially how exactly we can use the set-uid and what is the 'default' 
> > > > helper.
> > > > We also would prefer to do everything from qemu but we do not see how
> > > > we can do that.
> > >
> > >
> > Some more questions to understand the idea better:
> > > Something like:
> > >
> > > 1) -netdev tap,rss_helper=/path/to/name
> >
> > So, on each editing of 'emulator' in the xml  the helper path should
> > be set manually or be default?
>
> It could done manually, or we can have a default path.
>
> >
> > > 2) having a sample/default helper implemented in Qemu
> >
> > Does it mean the default helper is the code in the qemu (without
> > running additional executable, like it does today)
>
> Yes.
If the "default helper" is just a keyword and it is like what we have
today (i.e. part of qemu) it can't work under libvirt and should never
be used by libvirt.
>
>  or this is qemu
> > itself with dedicated command line?
> > As far as I remember Daniel had strong objections of ever running qemu
> > with capabilities
>
> Qemu won't run with capabilities but the helper.
So under libvirt the helper is always separate executable and not
"default helper"

>
> >
> > > 3) we can introduce something special path like "default", then if
> > > -netdev tap,rss_helper="default" is specified, qemu will use the
> > > sample helper
> >
> > Probably this is not so important but the rss helper and rss in
> > general has no relation to netdev, much more they are related to
> > virtio-net
>
> So I think the reason for this is that we currently only support
> eBPF/RSS for tap.

This is just because only tap supports respective ioctls.

>
> >
> > >
> > > So we have:
> > > 1) set set-uid for the

Re: [PATCH 4/5] ebpf_rss_helper: Added helper for eBPF RSS.

2021-09-08 Thread Yuri Benditovich

On Wed, Sep 8, 2021 at 6:45 AM Jason Wang  wrote:
>
> On Tue, Sep 7, 2021 at 6:40 PM Yuri Benditovich
>  wrote:
> >
> > On Wed, Sep 1, 2021 at 9:42 AM Jason Wang  wrote:
> > >
> > >
> > > 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > > > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang  wrote:
> > > >>
> > > >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > > >>> Helper program. Loads eBPF RSS program and maps and passes them 
> > > >>> through unix socket.
> > > >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> > > >>
> > > >> I wonder if this can be done as helper for TAP/bridge.
> > > >>
> > > >> E.g it's the qemu to launch those helper with set-uid.
> > > >>
> > > >> Then libvirt won't even need to care about that?
> > > >>
> > > > There are pros and cons for such a solution with set-uid.
> > > >  From my point of view one of the cons is that set-uid is efficient
> > > > only at install time so the coexistence of different qemu builds (and
> > > > different helpers for each one) is kind of problematic.
> > > > With the current solution this does not present any problem: the
> > > > developer can have several different builds, each one automatically
> > > > has its own helper and there is no conflict between these builds and
> > > > between these builds and installed qemu package. Changing the
> > > > 'emulator' in the libvirt profile automatically brings the proper
> > > > helper to work.
> > >
> > >
> > > I'm not sure I get you here. We can still have default/sample helper to
> > > make sure it works for different builds.
> > >
> > > If we can avoid the involvement of libvirt, that would be better.
> >
> > Hi Jason,
> >
> > Indeed I did not get the idea, can you please explain it in more
> > details (as detailed as possible to avoid future misunderstanding),
> > especially how exactly we can use the set-uid and what is the 'default' 
> > helper.
> > We also would prefer to do everything from qemu but we do not see how
> > we can do that.
>
>
Some more questions to understand the idea better:
> Something like:
>
> 1) -netdev tap,rss_helper=/path/to/name

So, on each editing of 'emulator' in the xml  the helper path should
be set manually or be default?

> 2) having a sample/default helper implemented in Qemu

Does it mean the default helper is the code in the qemu (without
running additional executable, like it does today) or this is qemu
itself with dedicated command line?
As far as I remember Daniel had strong objections of ever running qemu
with capabilities

> 3) we can introduce something special path like "default", then if
> -netdev tap,rss_helper="default" is specified, qemu will use the
> sample helper

Probably this is not so important but the rss helper and rss in
general has no relation to netdev, much more they are related to
virtio-net

>
> So we have:
> 1) set set-uid for the helper
Who and when does set-uid to the helper binary? Only installer or
libvirt can do that, correct?

> 2) libvirt may just choose to launch the default helper
All this discussion is to avoid launching the helper from libvirt, correct?

>
> >
> > Our main points (what should be addressed):
> > - qemu should be able to load ebpf and use the maps when it runs from
> > libvirt (without special caps) and standalone (with caps)
>
> This is solved by leaving the privileged operations to the helper with 
> set-uid.
>
> > - it is possible that there are different qemu builds on the machine,
> > one of them might be installed, their ebpf's might be different and
> > the interface between qemu and ebpf (exact content of maps and number
> > of maps)
>
> We can use different helpers in this way.
>
> > - qemu configures the RSS dynamically according to the commands
> > provided by the guest
>
> Consider we decided to use mmap() based maps, this is not an issue.
>
> Or am I missing something?
>
> Thanks
>
> >
> > Thanks in advance
> > Yuri
> >
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >>> Also, libbpf dependency now exclusively for Linux.
> > > >>> Libbpf is used for eBPF RSS steering, which is supported only by 
> > > >>> Linux TAP.
> > > >>> There is no reason yet to build eBPF loader and helper for non Linux 
>

Re: [PATCH 4/5] ebpf_rss_helper: Added helper for eBPF RSS.

2021-09-07 Thread Yuri Benditovich

On Wed, Sep 1, 2021 at 9:42 AM Jason Wang  wrote:
>
>
> 在 2021/8/31 上午1:07, Yuri Benditovich 写道:
> > On Fri, Aug 20, 2021 at 6:41 AM Jason Wang  wrote:
> >>
> >> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> >>> Helper program. Loads eBPF RSS program and maps and passes them through 
> >>> unix socket.
> >>> Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
> >>
> >> I wonder if this can be done as helper for TAP/bridge.
> >>
> >> E.g it's the qemu to launch those helper with set-uid.
> >>
> >> Then libvirt won't even need to care about that?
> >>
> > There are pros and cons for such a solution with set-uid.
> >  From my point of view one of the cons is that set-uid is efficient
> > only at install time so the coexistence of different qemu builds (and
> > different helpers for each one) is kind of problematic.
> > With the current solution this does not present any problem: the
> > developer can have several different builds, each one automatically
> > has its own helper and there is no conflict between these builds and
> > between these builds and installed qemu package. Changing the
> > 'emulator' in the libvirt profile automatically brings the proper
> > helper to work.
>
>
> I'm not sure I get you here. We can still have default/sample helper to
> make sure it works for different builds.
>
> If we can avoid the involvement of libvirt, that would be better.

Hi Jason,

Indeed I did not get the idea, can you please explain it in more
details (as detailed as possible to avoid future misunderstanding),
especially how exactly we can use the set-uid and what is the 'default' helper.
We also would prefer to do everything from qemu but we do not see how
we can do that.

Our main points (what should be addressed):
- qemu should be able to load ebpf and use the maps when it runs from
libvirt (without special caps) and standalone (with caps)
- it is possible that there are different qemu builds on the machine,
one of them might be installed, their ebpf's might be different and
the interface between qemu and ebpf (exact content of maps and number
of maps)
- qemu configures the RSS dynamically according to the commands
provided by the guest

Thanks in advance
Yuri

>
> Thanks
>
>
> >
> >>> Also, libbpf dependency now exclusively for Linux.
> >>> Libbpf is used for eBPF RSS steering, which is supported only by Linux 
> >>> TAP.
> >>> There is no reason yet to build eBPF loader and helper for non Linux 
> >>> systems,
> >>> even if libbpf is present.
> >>>
> >>> Signed-off-by: Andrew Melnychenko 
> >>> ---
> >>>ebpf/qemu-ebpf-rss-helper.c | 130 
> >>>meson.build |  37 ++
> >>>2 files changed, 154 insertions(+), 13 deletions(-)
> >>>create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> >>>
> >>> diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> >>> new file mode 100644
> >>> index 00..fe68758f57
> >>> --- /dev/null
> >>> +++ b/ebpf/qemu-ebpf-rss-helper.c
> >>> @@ -0,0 +1,130 @@
> >>> +/*
> >>> + * eBPF RSS Helper
> >>> + *
> >>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> >>> + *
> >>> + * Authors:
> >>> + *  Andrew Melnychenko 
> >>> + *
> >>> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> >>> + * the COPYING file in the top-level directory.
> >>> + *
> >>> + * Description: This is helper program for libvirtd.
> >>> + *  It loads eBPF RSS program and passes fds through unix 
> >>> socket.
> >>> + *  Built by meson, target - 'qemu-ebpf-rss-helper'.
> >>> + */
> >>> +
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +#include 
> >>> +
> >>> +#include "ebpf_rss.h"
> >>> +
> >>> +#include "qemu-helper-stamp.h"
> >>> +
> >>> +void QEMU_HELPER_STAMP(void) {}
> >>> +
> >>> +static int send_fds(int socket, int *fds, int n)
> >>> +{
> >>> +struct msghdr msg = {};
> >>> +struct cmsghdr *cmsg = NULL;
> >>> +char buf[CMSG_SPACE(n * s

Re: [PATCH 4/5] ebpf_rss_helper: Added helper for eBPF RSS.

2021-08-30 Thread Yuri Benditovich

On Fri, Aug 20, 2021 at 6:41 AM Jason Wang  wrote:
>
>
> 在 2021/7/13 下午11:37, Andrew Melnychenko 写道:
> > Helper program. Loads eBPF RSS program and maps and passes them through 
> > unix socket.
> > Libvirt may launch this helper and pass eBPF fds to qemu virtio-net.
>
>
> I wonder if this can be done as helper for TAP/bridge.
>
> E.g it's the qemu to launch those helper with set-uid.
>
> Then libvirt won't even need to care about that?
>

There are pros and cons for such a solution with set-uid.
>From my point of view one of the cons is that set-uid is efficient
only at install time so the coexistence of different qemu builds (and
different helpers for each one) is kind of problematic.
With the current solution this does not present any problem: the
developer can have several different builds, each one automatically
has its own helper and there is no conflict between these builds and
between these builds and installed qemu package. Changing the
'emulator' in the libvirt profile automatically brings the proper
helper to work.

>
> > Also, libbpf dependency now exclusively for Linux.
> > Libbpf is used for eBPF RSS steering, which is supported only by Linux TAP.
> > There is no reason yet to build eBPF loader and helper for non Linux 
> > systems,
> > even if libbpf is present.
> >
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >   ebpf/qemu-ebpf-rss-helper.c | 130 
> >   meson.build |  37 ++
> >   2 files changed, 154 insertions(+), 13 deletions(-)
> >   create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> >
> > diff --git a/ebpf/qemu-ebpf-rss-helper.c b/ebpf/qemu-ebpf-rss-helper.c
> > new file mode 100644
> > index 00..fe68758f57
> > --- /dev/null
> > +++ b/ebpf/qemu-ebpf-rss-helper.c
> > @@ -0,0 +1,130 @@
> > +/*
> > + * eBPF RSS Helper
> > + *
> > + * Developed by Daynix Computing LTD (http://www.daynix.com)
> > + *
> > + * Authors:
> > + *  Andrew Melnychenko 
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2.  See
> > + * the COPYING file in the top-level directory.
> > + *
> > + * Description: This is helper program for libvirtd.
> > + *  It loads eBPF RSS program and passes fds through unix 
> > socket.
> > + *  Built by meson, target - 'qemu-ebpf-rss-helper'.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include "ebpf_rss.h"
> > +
> > +#include "qemu-helper-stamp.h"
> > +
> > +void QEMU_HELPER_STAMP(void) {}
> > +
> > +static int send_fds(int socket, int *fds, int n)
> > +{
> > +struct msghdr msg = {};
> > +struct cmsghdr *cmsg = NULL;
> > +char buf[CMSG_SPACE(n * sizeof(int))];
> > +char dummy_buffer = 0;
> > +struct iovec io = { .iov_base = _buffer,
> > +.iov_len = sizeof(dummy_buffer) };
> > +
> > +memset(buf, 0, sizeof(buf));
> > +
> > +msg.msg_iov = 
> > +msg.msg_iovlen = 1;
> > +msg.msg_control = buf;
> > +msg.msg_controllen = sizeof(buf);
> > +
> > +cmsg = CMSG_FIRSTHDR();
> > +cmsg->cmsg_level = SOL_SOCKET;
> > +cmsg->cmsg_type = SCM_RIGHTS;
> > +cmsg->cmsg_len = CMSG_LEN(n * sizeof(int));
> > +
> > +memcpy(CMSG_DATA(cmsg), fds, n * sizeof(int));
> > +
> > +return sendmsg(socket, , 0);
> > +}
> > +
> > +static void print_help_and_exit(const char *prog, int exitcode)
> > +{
> > +fprintf(stderr, "%s - load eBPF RSS program for qemu and pass eBPF fds"
> > +" through unix socket.\n", prog);
> > +fprintf(stderr, "\t--fd , -f  - unix socket file descriptor"
> > +" used to pass eBPF fds.\n");
> > +fprintf(stderr, "\t--help, -h - this help.\n");
> > +exit(exitcode);
> > +}
> > +
> > +int main(int argc, char **argv)
> > +{
> > +char *fd_string = NULL;
> > +int unix_fd = 0;
> > +struct EBPFRSSContext ctx = {};
> > +int fds[EBPF_RSS_MAX_FDS] = {};
> > +int ret = -1;
> > +
> > +for (;;) {
> > +int c;
> > +static struct option long_options[] = {
> > +{"help",  no_argument, 0, 'h'},
> > +{"fd",  required_argument, 0, 'f'},
> > +{0, 0, 0, 0}
> > +};
> > +c = getopt_long(argc, argv, "hf:",
> > +long_options, NULL);
> > +
> > +if (c == -1) {
> > +break;
> > +}
> > +
> > +switch (c) {
> > +case 'f':
> > +fd_string = optarg;
> > +break;
> > +case 'h':
> > +default:
> > +print_help_and_exit(argv[0],
> > +c == 'h' ? EXIT_SUCCESS : EXIT_FAILURE);
> > +}
> > +}
> > +
> > +if (!fd_string) {
> > +fprintf(stderr, "Unix file descriptor not present.\n");
> > +print_help_and_exit(argv[0], EXIT_FAILURE);
> > +}
> > +
> > +unix_fd = atoi(fd_string);
> > +
> > +if (!unix_fd) {
> > +

Re: [PATCH 5/5] qmp: Added qemu-ebpf-rss-path command.

2021-08-30 Thread Yuri Benditovich

On Mon, Aug 30, 2021 at 11:14 AM Markus Armbruster  wrote:
>
> Yuri Benditovich  writes:
>
> > On Mon, Aug 30, 2021 at 9:10 AM Markus Armbruster  wrote:
> >>
> >> Yuri Benditovich  writes:
> >>
> >> > On Tue, Aug 24, 2021 at 9:41 AM Markus Armbruster  
> >> > wrote:
> >> >>
> >> >> Andrew Melnichenko  writes:
> >> >>
> >> >> > Hi,
> >> >> >
> >> >> >> The helper may or may not be installed at the path compiled into 
> >> >> >> QEMU.
> >> >> >>
> >> >> > Yes, so the helper will not be called - QEMU will try to initiate 
> >> >> > eBPF RSS
> >> >> > or use "in-qemu" RSS.
> >> >>
> >> >> My point is: the proposed command's mission is to help the management
> >> >> application run the right helper.  However, its advice is *unreliable*.
> >> >> It may point to the wrong helper, or to nothing at all.  The right
> >> >> helper may still exist elsewhere.
> >> >
> >> > Hi Markus,
> >> > Indeed the intention of this command is to return the proper helper.
> >> > Especially in the case of RSS helper this is *reliable* advice and it
> >> > points to the helper that was built together with QEMU, i.e. with the
> >> > same headers.
> >> > This was discussed earlier, for example in
> >> > https://lists.nongnu.org/archive/html/qemu-devel/2021-06/msg02248.html
> >> >
> >> >>
> >> >> I suspect you're trying to address the problem at the wrong level.
> >> >
> >> > What is the proper solution for the problem from your point of view?
> >>
> >> I'll explain in more detail, but first I'd like you to answer my
> >> question below.
> >>
> >> >> Similar versioning issues exist with other helpers.  We've been doing
> >> >> fine without QEMU providing unreliable advice on where they might sit in
> >> >> the file system.  What makes this one different?
> >> >
> >> > This one is required to be *fully synchronized* with the existing build 
> >> > of QEMU.
> >> > Other helpers are probably less restrictive and do not have common
> >> > structures definitions with the QEMU, otherwise they would face the
> >> > same problem.
> >> >
> >> >>
> >> >> >> What happens when you use the wrong helper?
> >> >
> >> > Our intention is that libvirt should never use the wrong RSS helper.
> >> > But it does not have any ability to check which helper is compatible
> >> > with the QEMU.
> >> > QEMU can easily recognize the correct one.
> >>
> >> You did not actually answer my question :)
> >>
> >> So let's try again: if libvirt does use the wrong RSS helper, how does
> >> the system behave?
> >
> > The receive-side scaling may work incorrectly, i.e. finally may move
> > incoming packets to a virtqueue different than expected one.
>
> Then I'm confused about the purpose of "the stamp" mentioned below.  Can
> you enlighten me?

The stamp is a string (common for qemu executable and RSS helper
executable during build) that qemu can later retrieve from the helper
in run-time and ensure this helper is fully compatible with this build
of qemu (in terms of eBPF operation). The helper is built with the
same C headers (related to ebpf operation) as the qemu, the qemu is
able to receive file descriptors created by the helper (of ebpf
program and ebpf data structure's maps) from libvirt and deal with
them as if it has created them.

>
> >
> >>
> >> >> >>
> >> >> > UB - in most cases, eBPF program will work with wrong configurations.
> >> >> > That's why the stamp was added.
> >> >> >
> >> >> > query-helper-paths checks the stamp only for RSS helper.
> >> >>
> >> >> I have no idea what you're talking about :)
> >> >>
> >> >> My best guess is that you're trying to tell me that attempting to work
> >> >> with the wrong helper will fail cleanly due to some stamp check.  That
> >> >> would be nice.
>

Re: [PATCH 5/5] qmp: Added qemu-ebpf-rss-path command.

2021-08-30 Thread Yuri Benditovich

On Mon, Aug 30, 2021 at 9:10 AM Markus Armbruster  wrote:
>
> Yuri Benditovich  writes:
>
> > On Tue, Aug 24, 2021 at 9:41 AM Markus Armbruster  wrote:
> >>
> >> Andrew Melnichenko  writes:
> >>
> >> > Hi,
> >> >
> >> >> The helper may or may not be installed at the path compiled into QEMU.
> >> >>
> >> > Yes, so the helper will not be called - QEMU will try to initiate eBPF 
> >> > RSS
> >> > or use "in-qemu" RSS.
> >>
> >> My point is: the proposed command's mission is to help the management
> >> application run the right helper.  However, its advice is *unreliable*.
> >> It may point to the wrong helper, or to nothing at all.  The right
> >> helper may still exist elsewhere.
> >
> > Hi Markus,
> > Indeed the intention of this command is to return the proper helper.
> > Especially in the case of RSS helper this is *reliable* advice and it
> > points to the helper that was built together with QEMU, i.e. with the
> > same headers.
> > This was discussed earlier, for example in
> > https://lists.nongnu.org/archive/html/qemu-devel/2021-06/msg02248.html
> >
> >>
> >> I suspect you're trying to address the problem at the wrong level.
> >
> > What is the proper solution for the problem from your point of view?
>
> I'll explain in more detail, but first I'd like you to answer my
> question below.
>
> >> Similar versioning issues exist with other helpers.  We've been doing
> >> fine without QEMU providing unreliable advice on where they might sit in
> >> the file system.  What makes this one different?
> >
> > This one is required to be *fully synchronized* with the existing build of 
> > QEMU.
> > Other helpers are probably less restrictive and do not have common
> > structures definitions with the QEMU, otherwise they would face the
> > same problem.
> >
> >>
> >> >> What happens when you use the wrong helper?
> >
> > Our intention is that libvirt should never use the wrong RSS helper.
> > But it does not have any ability to check which helper is compatible
> > with the QEMU.
> > QEMU can easily recognize the correct one.
>
> You did not actually answer my question :)
>
> So let's try again: if libvirt does use the wrong RSS helper, how does
> the system behave?

The receive-side scaling may work incorrectly, i.e. finally may move
incoming packets to a virtqueue different than expected one.

>
> >> >>
> >> > UB - in most cases, eBPF program will work with wrong configurations.
> >> > That's why the stamp was added.
> >> >
> >> > query-helper-paths checks the stamp only for RSS helper.
> >>
> >> I have no idea what you're talking about :)
> >>
> >> My best guess is that you're trying to tell me that attempting to work
> >> with the wrong helper will fail cleanly due to some stamp check.  That
> >> would be nice.
> >>
>

Re: [PATCH 5/5] qmp: Added qemu-ebpf-rss-path command.

2021-08-29 Thread Yuri Benditovich

On Tue, Aug 24, 2021 at 9:41 AM Markus Armbruster  wrote:
>
> Andrew Melnichenko  writes:
>
> > Hi,
> >
> >> The helper may or may not be installed at the path compiled into QEMU.
> >>
> > Yes, so the helper will not be called - QEMU will try to initiate eBPF RSS
> > or use "in-qemu" RSS.
>
> My point is: the proposed command's mission is to help the management
> application run the right helper.  However, its advice is *unreliable*.
> It may point to the wrong helper, or to nothing at all.  The right
> helper may still exist elsewhere.

Hi Markus,
Indeed the intention of this command is to return the proper helper.
Especially in the case of RSS helper this is *reliable* advice and it
points to the helper that was built together with QEMU, i.e. with the
same headers.
This was discussed earlier, for example in
https://lists.nongnu.org/archive/html/qemu-devel/2021-06/msg02248.html

>
> I suspect you're trying to address the problem at the wrong level.

What is the proper solution for the problem from your point of view?

>
> Similar versioning issues exist with other helpers.  We've been doing
> fine without QEMU providing unreliable advice on where they might sit in
> the file system.  What makes this one different?

This one is required to be *fully synchronized* with the existing build of QEMU.
Other helpers are probably less restrictive and do not have common
structures definitions with the QEMU, otherwise they would face the
same problem.

>
> >> What happens when you use the wrong helper?

Our intention is that libvirt should never use the wrong RSS helper.
But it does not have any ability to check which helper is compatible
with the QEMU.
QEMU can easily recognize the correct one.

> >>
> > UB - in most cases, eBPF program will work with wrong configurations.
> > That's why the stamp was added.
> >
> > query-helper-paths checks the stamp only for RSS helper.
>
> I have no idea what you're talking about :)
>
> My best guess is that you're trying to tell me that attempting to work
> with the wrong helper will fail cleanly due to some stamp check.  That
> would be nice.
>

Re: [PATCH 0/5] ebpf: Added ebpf helper for libvirtd.

2021-08-16 Thread Yuri Benditovich

Jason,
Can you please review the series?

Thanks,
Yuri

On Thu, Jul 22, 2021 at 11:38 AM Andrew Melnichenko  wrote:
>
> ping
>
> On Tue, Jul 13, 2021 at 6:38 PM Andrew Melnychenko  wrote:
>>
>> Libvirt usually launches qemu with strict permissions.
>> To enable eBPF RSS steering, qemu-ebpf-rss-helper was added.
>>
>> Added property "ebpf_rss_fds" for "virtio-net" that allows to
>> initialize eBPF RSS context with passed program & maps fds.
>>
>> Added qemu-ebpf-rss-helper - simple helper that loads eBPF
>> context and passes fds through unix socket.
>> Libvirt should call the helper and pass fds to qemu through
>> "ebpf_rss_fds" property.
>>
>> Added explicit target OS check for libbpf dependency in meson.
>> eBPF RSS works only with Linux TAP, so there is no reason to
>> build eBPF loader/helper for non-Linux.
>>
>> Changed Qemu updates eBPF maps to array mmaping. Mmaping allows
>> bypassing unprivileged BPF map update. Also, instead of 3 maps
>> (config, key and indirection table) there is one map that
>> combines everything.
>>
>> Added helper stamp. To check that helper was build with qemu,
>> qemu would check helper symbols that should contain the stamp.
>> It was done similar to qemu modules, but checking was performed
>> by the helper's ELF parsing.
>>
>> Overall, libvirt process should not be aware of the "interface"
>> of eBPF RSS, it will not be aware of eBPF maps/program "type" and
>> their quantity. That's why qemu and the helper should be from
>> the same build and be "synchronized". Technically each qemu may
>> have its own helper. That's why "query-helper-paths" qmp command
>> was added. Qemu should return the path to the helper that suits
>> and libvirt should use "that" helper for "that" emulator.
>>
>> qmp sample:
>> C: { "execute": "query-helper-paths" }
>> S: { "return": [
>>  {
>>"name": "qemu-ebpf-rss-helper",
>>"path": "/usr/local/libexec/qemu-ebpf-rss-helper"
>>  }
>> ]
>>}
>>
>> Changes since v1:
>> * Mmap() used instead if bpf_map_update_elem().
>> * Added helper stamp.
>>
>> Andrew Melnychenko (5):
>>   ebpf: Added eBPF initialization by fds and map update.
>>   virtio-net: Added property to load eBPF RSS with fds.
>>   qmp: Added the helper stamp check.
>>   ebpf_rss_helper: Added helper for eBPF RSS.
>>   qmp: Added qemu-ebpf-rss-path command.
>>
>>  ebpf/ebpf_rss-stub.c  |   6 +
>>  ebpf/ebpf_rss.c   | 120 ---
>>  ebpf/ebpf_rss.h   |   8 +-
>>  ebpf/qemu-ebpf-rss-helper.c   | 130 +++
>>  ebpf/rss.bpf.skeleton.h   | 557 +++---
>>  hw/net/virtio-net.c   |  77 -
>>  include/hw/virtio/virtio-net.h|   1 +
>>  meson.build   |  47 ++-
>>  monitor/meson.build   |   1 +
>>  monitor/qemu-helper-stamp-utils.c | 297 
>>  monitor/qemu-helper-stamp-utils.h |  24 ++
>>  monitor/qmp-cmds.c|  32 ++
>>  qapi/misc.json|  33 ++
>>  tools/ebpf/rss.bpf.c  |  67 ++--
>>  14 files changed, 990 insertions(+), 410 deletions(-)
>>  create mode 100644 ebpf/qemu-ebpf-rss-helper.c
>>  create mode 100644 monitor/qemu-helper-stamp-utils.c
>>  create mode 100644 monitor/qemu-helper-stamp-utils.h
>>
>> --
>> 2.31.1
>>

Re: [RFC PATCH 0/5] ebpf: Added ebpf helper for libvirtd.

2021-06-28 Thread Yuri Benditovich

On Wed, Jun 23, 2021 at 3:47 AM Jason Wang  wrote:
>
>
> 在 2021/6/22 下午5:09, Toke Høiland-Jørgensen 写道:
> > Daniel P. Berrangé  writes:
> >
> >> On Tue, Jun 22, 2021 at 10:25:19AM +0200, Toke Høiland-Jørgensen wrote:
> >>> Jason Wang  writes:
> >>>
> >>>> 在 2021/6/22 上午11:29, Yuri Benditovich 写道:
> >>>>> On Mon, Jun 21, 2021 at 12:20 PM Jason Wang  wrote:
> >>>>>> 在 2021/6/19 上午4:03, Andrew Melnichenko 写道:
> >>>>>>> Hi Jason,
> >>>>>>> I've checked "kernel.unprivileged_bpf_disabled=0" on Fedora,  Ubuntu,
> >>>>>>> and Debian - no need permissions to update BPF maps.
> >>>>>> How about RHEL :) ?
> >>>>> If I'm not mistaken, the RHEL releases do not use modern kernels yet
> >>>>> (for BPF we need 5.8+).
> >>>>> So this will be (probably) relevant for RHEL 9. Please correct me if 
> >>>>> I'm wrong.
> >>>> Adding Toke for more ideas on this.
> >>> Ignore the kernel version number; we backport all of BPF to RHEL,
> >>> basically. RHEL8.4 is up to upstream kernel 5.10, feature-wise.
> >>>
> >>> However, we completely disable unprivileged BPF on RHEL kernels. Also,
> >>> there's upstream commit:
> >>> 08389d888287 ("bpf: Add kconfig knob for disabling unpriv bpf by default")
> >>>
> >>> which adds a new value of '2' to the unprivileged_bpf_disable sysctl. I
> >>> believe this may end up being the default on Fedora as well.
> >>>
> >>> So any design relying on unprivileged BPF is likely to break; I'd
> >>> suggest you look into how you can get this to work with CAP_BPF :)
> >> QEMU will never have any capabilities. Any resources that required
> >> privileges have to be opened by a separate privileged helper, and the
> >> open FD then passed across to the QEMU process. This relies on the
> >> capabilities checks only being performed at time of initial opening,
> >> and *not* on operations performed on the already open FD.
> > That won't work for regular map updates either, unfortunately: you still
> > have to perform a bpf() syscall to update an element, and that is a
> > privileged operation.
> >
> > You may be able to get around this by using an array map type and
> > mmap()'ing the map contents, but I'm not sure how well that will work
> > across process boundaries.
> >
> > If it doesn't, I only see two possibilities: populate the map
> > ahead-of-time and leave it in place, or keep the privileged helper
> > process around to perform map updates on behalf of QEMU...
>
>
> Right, and this could be probably done by extending and tracking the RSS
> update via rx filter event.

Jason,
Can you please get a little into details - what you mean by 'extending
and tracking the RSS
> update via rx filter event'?

Thanks,
Yuri

>
> Thanks
>
>
> >
> > -Toke
> >
>

Re: [RFC PATCH 0/5] ebpf: Added ebpf helper for libvirtd.

2021-06-21 Thread Yuri Benditovich

On Mon, Jun 21, 2021 at 12:20 PM Jason Wang  wrote:
>
>
> 在 2021/6/19 上午4:03, Andrew Melnichenko 写道:
> > Hi Jason,
> > I've checked "kernel.unprivileged_bpf_disabled=0" on Fedora,  Ubuntu,
> > and Debian - no need permissions to update BPF maps.
>
>
> How about RHEL :) ?

If I'm not mistaken, the RHEL releases do not use modern kernels yet
(for BPF we need 5.8+).
So this will be (probably) relevant for RHEL 9. Please correct me if I'm wrong.


>
> Thanks
>
>
> >
> > On Wed, Jun 16, 2021 at 1:18 AM Andrew Melnichenko  > <mailto:and...@daynix.com>> wrote:
> >
> > Hi,
> >
> > I may miss something.
> >
> > But RSS requires to update the map. This won't work if you
> > don't grant
> > any permission to qemu.
> >
> > Thanks
> >
> >
> > Partly - with "kernel.unprivileged_bpf_disabled=0" capabilities is
> > not required to update maps.
> > With "kernel.unprivileged_bpf_disabled=1" - setting maps will
> > fail(without CAP_BPF) and "in-qemu" RSS will be used.
> >
> > On Tue, Jun 15, 2021 at 12:13 PM Jason Wang  > <mailto:jasow...@redhat.com>> wrote:
> >
> >
> > 在 2021/6/12 上午12:49, Andrew Melnichenko 写道:
> > > Hi,
> > >
> > > So I think the series is for unprivileged_bpf disabled.
> > If I'm not
> > > wrong, I guess the policy is to grant CAP_BPF but do
> > fine grain
> > > checks
> > > via LSM.
> > >
> > >
> > > The main idea is to run eBPF RSS with qemu without any
> > permission.
> > > Libvirt should handle everything and pass proper eBPF file
> > descriptors.
> > > For current eBPF RSS, CAP_SYS_ADMIN(bypass some limitations)
> > > also required, and in the future may be other permissions.
> >
> >
> > I may miss something.
> >
> > But RSS requires to update the map. This won't work if you
> > don't grant
> > any permission to qemu.
> >
> > Thanks
> >
> >
> > >
> > > I'm not sure this is the best. We have several examples
> > that let
> > > libvirt
> > > to involve. Examples:
> > >
> > > 1) create TAP device (and the TUN_SETIFF)
> > >
> > > 2) open vhost devices
> > >
> > >
> > > Technically TAP/vhost not related to a particular qemu
> > emulator. So common
> > > TAP creation should fit any modern qemu. eBPF fds(program
> > and maps) should
> > > suit the interface for current qemu, g.e. some qemu builds
> > may have
> > > different map
> > > structures or their count. It's necessary that the qemu got fds
> > > prepared by the helper
> > > that was built with the qemu.
> > >
> > > I think we need an example on the detail steps for how
> > libvirt is
> > > expected to use this.
> > >
> > >
> > > The simplified workflow looks like this:
> > >
> > >  1. Libvirt got "emulator" from domain document.
> > >  2. Libvirt queries for qemu capabilities.
> > >  3. One of the capabilities is "qemu-ebpf-rss-helper"
> > path(if present).
> > >  4. On NIC preparation Libvirt checks for virtio-net + rss
> > configurations.
> > >  5. If required, the "qemu-ebpf-rss-helper" called and fds are
> > > received through unix fd.
> > >  6. Those fds are for eBPF RSS, which passed to child
> > process - qemu.
> > >  7. Qemu launched with virtio-net-pci property "rss" and
> > "ebpf_rss_fds".
> > >
> > >
> > > On Fri, Jun 11, 2021 at 8:36 AM Jason Wang
> > mailto:jasow...@redhat.com>
> > > <mailto:jasow...@redhat.com <mailto:jasow...@redhat.com>>>
> > wrote:
> > >
> > >
> > > 在 2021/6/10 下午2:55, Yuri Benditovich 写道:
> > >

Re: [RFC PATCH 0/5] ebpf: Added ebpf helper for libvirtd.

2021-06-10 Thread Yuri Benditovich

On Thu, Jun 10, 2021 at 9:41 AM Jason Wang  wrote:
>
>
> 在 2021/6/9 下午6:04, Andrew Melnychenko 写道:
> > Libvirt usually launches qemu with strict permissions.
> > To enable eBPF RSS steering, qemu-ebpf-rss-helper was added.
>
>
> A silly question:
>
> Kernel had the following permission checks in bpf syscall:
>
> if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
>  return -EPERM;
> ...
>
>  err = security_bpf(cmd, , size);
>  if (err < 0)
>  return err;
>
> So if I understand the code correctly, bpf syscall can only be done if:
>
> 1) unprivileged_bpf is enabled or
> 2) has the capability  and pass the LSM checks
>
> So I think the series is for unprivileged_bpf disabled. If I'm not
> wrong, I guess the policy is to grant CAP_BPF but do fine grain checks
> via LSM.
>
> If this is correct, need to describe it in the commit log.
>
>
> >
> > Added property "ebpf_rss_fds" for "virtio-net" that allows to
> > initialize eBPF RSS context with passed program & maps fds.
> >
> > Added qemu-ebpf-rss-helper - simple helper that loads eBPF
> > context and passes fds through unix socket.
> > Libvirt should call the helper and pass fds to qemu through
> > "ebpf_rss_fds" property.
> >
> > Added explicit target OS check for libbpf dependency in meson.
> > eBPF RSS works only with Linux TAP, so there is no reason to
> > build eBPF loader/helper for non-Linux.
> >
> > Overall, libvirt process should not be aware of the "interface"
> > of eBPF RSS, it will not be aware of eBPF maps/program "type" and
> > their quantity.
>
>
> I'm not sure this is the best. We have several examples that let libvirt
> to involve. Examples:
>
> 1) create TAP device (and the TUN_SETIFF)
>
> 2) open vhost devices
>
>
> >   That's why qemu and the helper should be from
> > the same build and be "synchronized". Technically each qemu may
> > have its own helper. That's why "query-helper-paths" qmp command
> > was added. Qemu should return the path to the helper that suits
> > and libvirt should use "that" helper for "that" emulator.
> >
> > qmp sample:
> > C: { "execute": "query-helper-paths" }
> > S: { "return": [
> >   {
> > "name": "qemu-ebpf-rss-helper",
> > "path": "/usr/local/libexec/qemu-ebpf-rss-helper"
> >   }
> >  ]
> > }
>
>
> I think we need an example on the detail steps for how libvirt is
> expected to use this.

The preliminary patches for libvirt are at
https://github.com/daynix/libvirt/tree/RSSv1

>
> Thanks
>
>
> >
> > Andrew Melnychenko (5):
> >ebpf: Added eBPF initialization by fds.
> >virtio-net: Added property to load eBPF RSS with fds.
> >ebpf_rss_helper: Added helper for eBPF RSS.
> >qmp: Added qemu-ebpf-rss-path command.
> >meson: libbpf dependency now exclusively for Linux.
> >
> >   ebpf/ebpf_rss-stub.c   |   6 ++
> >   ebpf/ebpf_rss.c|  31 +++-
> >   ebpf/ebpf_rss.h|   5 ++
> >   ebpf/qemu-ebpf-rss-helper.c| 130 +
> >   hw/net/virtio-net.c|  77 ++-
> >   include/hw/virtio/virtio-net.h |   1 +
> >   meson.build|  37 ++
> >   monitor/qmp-cmds.c |  78 
> >   qapi/misc.json |  29 
> >   9 files changed, 374 insertions(+), 20 deletions(-)
> >   create mode 100644 ebpf/qemu-ebpf-rss-helper.c
> >
>

Re: [PATCH v5 0/7] eBPF RSS support for virtio-net

2021-05-17 Thread Yuri Benditovich

On Fri, May 14, 2021 at 4:43 PM Michael S. Tsirkin  wrote:
>
> On Thu, Mar 25, 2021 at 05:35:22PM +0200, Andrew Melnychenko wrote:
> > This set of patches introduces the usage of eBPF for packet steering
> > and RSS hash calculation:
> > * RSS(Receive Side Scaling) is used to distribute network packets to
> > guest virtqueues by calculating packet hash
> > * Additionally adding support for the usage of RSS with vhost
> >
> > The eBPF works on kernels 5.8+
> > On earlier kerneld it fails to load and the RSS feature is reported
> > only without vhost and implemented in 'in-qemu' software.
> >
> > Implementation notes:
> > Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF program.
> > Added libbpf dependency and eBPF support.
> > The eBPF program is part of the qemu and presented as an array
> > of BPF ELF file data. The eBPF array file initially generated by bpftool.
> > The compilation of eBPF is not part of QEMU build and can be done
> > using provided Makefile.ebpf.
> > Added changes to virtio-net and vhost, primary eBPF RSS is used.
> > 'in-qemu' RSS used in the case of hash population and as a fallback option.
> > For vhost, the hash population feature is not reported to the guest.
> >
> > Please also see the documentation in PATCH 6/7.
>
> Reviewed-by: Michael S. Tsirkin 
>
> > Known issues:
> > * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> > as a fallback, also, hash population feature is not reported to guests
> > with vhost.
>
> Could we instead fail init when RSS is requested and vhost is
> enabled? we can't do it for on by default features but we can
> for off by default ones ...
>
Of course this is possible.
I hope we do not need to stop the merge (it is in progress) and this
can be done in a separate patch and after some discussion.
Notes for the discussion:
1. We are not talking about RSS (it does not contradict with vhost
anymore), this is about "hash report".
2. Linux guest does not acknowledge this feature and for Linux VM
there is no motivation to enable it at all. So it looks like the issue
is minor, if any.
3. Currently we clear this feature with vhost but there is nothing
specific to the "hash report" feature; we clear it during a check of
vhost features (as well as other features dependent on vhost). If/when
this feature will be supported by the kernel - we'll not disable it
automatically. You suggest to fail the init for "hash + vhost"
explicitly without any special reason.
4. In general I think failing init is not the best behavior of qemu,
it is typically used in case of a really significant problem. Absence
of this feature is not something that leads to unexpected behavior or
significant performance loss.  Maybe a warning is enough?




> > * IPv6 extensions still in progress.
> >
> > Changes since v1:
> > * using libbpf instead of direct 'bpf' system call.
> > * added libbpf dependency to the configure/meson scripts.
> > * changed python script for eBPF .h file generation.
> > * changed eBPF program - reading L3 proto from ethernet frame.
> > * added TUNSETSTEERINGEBPF define for TUN.
> > * changed the maintainer's info.
> > * added license headers.
> > * refactored code.
> >
> > Changes since v2:
> > * using bpftool for eBPF skeleton generation.
> > * ebpf_rss is refactored to use skeleton generated by bpftool.
> > * added/adjasted license in comment sections and in eBPF file.
> > * rss.bpf.c and Makefile.ebpf moved to the tool/ebpf folder.
> > * virtio-net eBPF rss refactored. Now eBPF initialized during realize().
> >
> > Changes since v3:
> > * rebased to last master.
> > * fixed issue with failed build without libbpf.
> > * fixed ebpf loading without rss option.
> > * refactored labels in ebpf_rss.c
> >
> > Changes since v4:
> > * refactored configure/meson script.
> > * added checks for load_bytes in ebpf.
> > * documentation added to the index.
> > * refactored Makefile and rss.bpf.c.
> > * rebased to last master.
> >
> > Andrew (7):
> >   net/tap: Added TUNSETSTEERINGEBPF code.
> >   net: Added SetSteeringEBPF method for NetClientState.
> >   ebpf: Added eBPF RSS program.
> >   ebpf: Added eBPF RSS loader.
> >   virtio-net: Added eBPF RSS to virtio-net.
> >   docs: Added eBPF documentation.
> >   MAINTAINERS: Added eBPF maintainers information.
> >
> >  MAINTAINERS|   8 +
> >  configure  |   8 +-
> >  docs/devel/ebpf_rss.rst| 125 
> >  docs/devel/index.rst   |   1 +
> >  ebpf/ebpf_rss-stub.c   |  40 +++
> >  ebpf/ebpf_rss.c| 165 ++
> >  ebpf/ebpf_rss.h|  44 +++
> >  ebpf/meson.build   |   1 +
> >  ebpf/rss.bpf.skeleton.h| 423 +
> >  ebpf/trace-events  |   4 +
> >  ebpf/trace.h   |   2 +
> >  hw/net/vhost_net.c |   3 +
> >  hw/net/virtio-net.c| 115 ++-
> >  include/hw/virtio/virtio-net.h |   4 +
> >  include/net/net.h  |   2

Re: [PATCH v5 0/7] eBPF RSS support for virtio-net

2021-04-01 Thread Yuri Benditovich

More correctly, https://bugzilla.redhat.com/show_bug.cgi?id=1865786

On Fri, Apr 2, 2021 at 8:21 AM Yuri Benditovich
 wrote:
>
> Hi Jason,
>
> Yes, the work to support RSS in the Linux virtio-net driver is in progress.
> https://bugzilla.redhat.com/show_bug.cgi?id=1912082
>
> On Fri, Apr 2, 2021 at 5:57 AM Jason Wang  wrote:
> >
> >
> > 在 2021/3/25 下午11:35, Andrew Melnychenko 写道:
> > > This set of patches introduces the usage of eBPF for packet steering
> > > and RSS hash calculation:
> > > * RSS(Receive Side Scaling) is used to distribute network packets to
> > > guest virtqueues by calculating packet hash
> > > * Additionally adding support for the usage of RSS with vhost
> > >
> > > The eBPF works on kernels 5.8+
> > > On earlier kerneld it fails to load and the RSS feature is reported
> > > only without vhost and implemented in 'in-qemu' software.
> > >
> > > Implementation notes:
> > > Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF program.
> > > Added libbpf dependency and eBPF support.
> > > The eBPF program is part of the qemu and presented as an array
> > > of BPF ELF file data. The eBPF array file initially generated by bpftool.
> > > The compilation of eBPF is not part of QEMU build and can be done
> > > using provided Makefile.ebpf.
> > > Added changes to virtio-net and vhost, primary eBPF RSS is used.
> > > 'in-qemu' RSS used in the case of hash population and as a fallback 
> > > option.
> > > For vhost, the hash population feature is not reported to the guest.
> > >
> > > Please also see the documentation in PATCH 6/7.
> > >
> > > Known issues:
> > > * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> > > as a fallback, also, hash population feature is not reported to guests
> > > with vhost.
> > > * IPv6 extensions still in progress.
> >
> >
> > Hi Andrew:
> >
> > The patch looks good at a glance. I tend to queue it for 6.1.
> >
> > One issue is that, there's no easy way for testing it without a windows
> > guest.
> >
> > Do you have plan to extend Linux driver to support RSS (e.g via ethtool?).
> >
> > Thanks
> >
> >
> > >
> > > Changes since v1:
> > > * using libbpf instead of direct 'bpf' system call.
> > > * added libbpf dependency to the configure/meson scripts.
> > > * changed python script for eBPF .h file generation.
> > > * changed eBPF program - reading L3 proto from ethernet frame.
> > > * added TUNSETSTEERINGEBPF define for TUN.
> > > * changed the maintainer's info.
> > > * added license headers.
> > > * refactored code.
> > >
> > > Changes since v2:
> > > * using bpftool for eBPF skeleton generation.
> > > * ebpf_rss is refactored to use skeleton generated by bpftool.
> > > * added/adjasted license in comment sections and in eBPF file.
> > > * rss.bpf.c and Makefile.ebpf moved to the tool/ebpf folder.
> > > * virtio-net eBPF rss refactored. Now eBPF initialized during realize().
> > >
> > > Changes since v3:
> > > * rebased to last master.
> > > * fixed issue with failed build without libbpf.
> > > * fixed ebpf loading without rss option.
> > > * refactored labels in ebpf_rss.c
> > >
> > > Changes since v4:
> > > * refactored configure/meson script.
> > > * added checks for load_bytes in ebpf.
> > > * documentation added to the index.
> > > * refactored Makefile and rss.bpf.c.
> > > * rebased to last master.
> > >
> > > Andrew (7):
> > >net/tap: Added TUNSETSTEERINGEBPF code.
> > >net: Added SetSteeringEBPF method for NetClientState.
> > >ebpf: Added eBPF RSS program.
> > >ebpf: Added eBPF RSS loader.
> > >virtio-net: Added eBPF RSS to virtio-net.
> > >docs: Added eBPF documentation.
> > >MAINTAINERS: Added eBPF maintainers information.
> > >
> > >   MAINTAINERS|   8 +
> > >   configure  |   8 +-
> > >   docs/devel/ebpf_rss.rst| 125 
> > >   docs/devel/index.rst   |   1 +
> > >   ebpf/ebpf_rss-stub.c   |  40 +++
> > >   ebpf/ebpf_rss.c| 165 ++
> > >   ebpf/ebpf_rss.h|  44 +++
> > >   ebpf/meson.build   |   1 +
> > >   ebpf/rss.bpf.skeleton.h| 423 +
> >

Re: [PATCH v5 0/7] eBPF RSS support for virtio-net

2021-04-01 Thread Yuri Benditovich

Hi Jason,

Yes, the work to support RSS in the Linux virtio-net driver is in progress.
https://bugzilla.redhat.com/show_bug.cgi?id=1912082

On Fri, Apr 2, 2021 at 5:57 AM Jason Wang  wrote:
>
>
> 在 2021/3/25 下午11:35, Andrew Melnychenko 写道:
> > This set of patches introduces the usage of eBPF for packet steering
> > and RSS hash calculation:
> > * RSS(Receive Side Scaling) is used to distribute network packets to
> > guest virtqueues by calculating packet hash
> > * Additionally adding support for the usage of RSS with vhost
> >
> > The eBPF works on kernels 5.8+
> > On earlier kerneld it fails to load and the RSS feature is reported
> > only without vhost and implemented in 'in-qemu' software.
> >
> > Implementation notes:
> > Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF program.
> > Added libbpf dependency and eBPF support.
> > The eBPF program is part of the qemu and presented as an array
> > of BPF ELF file data. The eBPF array file initially generated by bpftool.
> > The compilation of eBPF is not part of QEMU build and can be done
> > using provided Makefile.ebpf.
> > Added changes to virtio-net and vhost, primary eBPF RSS is used.
> > 'in-qemu' RSS used in the case of hash population and as a fallback option.
> > For vhost, the hash population feature is not reported to the guest.
> >
> > Please also see the documentation in PATCH 6/7.
> >
> > Known issues:
> > * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> > as a fallback, also, hash population feature is not reported to guests
> > with vhost.
> > * IPv6 extensions still in progress.
>
>
> Hi Andrew:
>
> The patch looks good at a glance. I tend to queue it for 6.1.
>
> One issue is that, there's no easy way for testing it without a windows
> guest.
>
> Do you have plan to extend Linux driver to support RSS (e.g via ethtool?).
>
> Thanks
>
>
> >
> > Changes since v1:
> > * using libbpf instead of direct 'bpf' system call.
> > * added libbpf dependency to the configure/meson scripts.
> > * changed python script for eBPF .h file generation.
> > * changed eBPF program - reading L3 proto from ethernet frame.
> > * added TUNSETSTEERINGEBPF define for TUN.
> > * changed the maintainer's info.
> > * added license headers.
> > * refactored code.
> >
> > Changes since v2:
> > * using bpftool for eBPF skeleton generation.
> > * ebpf_rss is refactored to use skeleton generated by bpftool.
> > * added/adjasted license in comment sections and in eBPF file.
> > * rss.bpf.c and Makefile.ebpf moved to the tool/ebpf folder.
> > * virtio-net eBPF rss refactored. Now eBPF initialized during realize().
> >
> > Changes since v3:
> > * rebased to last master.
> > * fixed issue with failed build without libbpf.
> > * fixed ebpf loading without rss option.
> > * refactored labels in ebpf_rss.c
> >
> > Changes since v4:
> > * refactored configure/meson script.
> > * added checks for load_bytes in ebpf.
> > * documentation added to the index.
> > * refactored Makefile and rss.bpf.c.
> > * rebased to last master.
> >
> > Andrew (7):
> >net/tap: Added TUNSETSTEERINGEBPF code.
> >net: Added SetSteeringEBPF method for NetClientState.
> >ebpf: Added eBPF RSS program.
> >ebpf: Added eBPF RSS loader.
> >virtio-net: Added eBPF RSS to virtio-net.
> >docs: Added eBPF documentation.
> >MAINTAINERS: Added eBPF maintainers information.
> >
> >   MAINTAINERS|   8 +
> >   configure  |   8 +-
> >   docs/devel/ebpf_rss.rst| 125 
> >   docs/devel/index.rst   |   1 +
> >   ebpf/ebpf_rss-stub.c   |  40 +++
> >   ebpf/ebpf_rss.c| 165 ++
> >   ebpf/ebpf_rss.h|  44 +++
> >   ebpf/meson.build   |   1 +
> >   ebpf/rss.bpf.skeleton.h| 423 +
> >   ebpf/trace-events  |   4 +
> >   ebpf/trace.h   |   2 +
> >   hw/net/vhost_net.c |   3 +
> >   hw/net/virtio-net.c| 115 ++-
> >   include/hw/virtio/virtio-net.h |   4 +
> >   include/net/net.h  |   2 +
> >   meson.build|   9 +
> >   meson_options.txt  |   2 +
> >   net/tap-bsd.c  |   5 +
> >   net/tap-linux.c|  13 +
> >   net/tap-linux.h|   1 +
> >   net/tap-solaris.c  |   5 +
> >   net/tap-stub.c |   5 +
> >   net/tap.c  |   9 +
> >   net/tap_int.h  |   1 +
> >   net/vhost-vdpa.c   |   2 +
> >   tools/ebpf/Makefile.ebpf   |  22 ++
> >   tools/ebpf/rss.bpf.c   | 552 +
> >   27 files changed, 1567 insertions(+), 4 deletions(-)
> >   create mode 100644 docs/devel/ebpf_rss.rst
> >   create mode 100644 ebpf/ebpf_rss-stub.c
> >   create mode 100644 ebpf/ebpf_rss.c
> >   create mode 100644 ebpf/ebpf_rss.h
> >   create mode 100644 ebpf/meson.build
> >   create mode 100644

Re: [RFC PATCH v2 0/3] virtio-net: graceful drop of vhost for TAP

2021-03-26 Thread Yuri Benditovich

On Fri, Mar 26, 2021 at 10:51 AM Jason Wang  wrote:
>
>
> 在 2021/3/25 下午5:00, Yuri Benditovich 写道:
> > Hi Jason,
> >
> > This was discussed earlier on the previous series of patches.
> > https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg01829.html
> > There were strong objections from both Daniel and Michael and I feel
> > that the series was rejected.
> > There was Michael's claim:
> > "We did what this patch is trying to change for years now, in
> > particular KVM also seems to happily disable CPU features not supported
> > by kernel so I wonder why we can't keep doing it, with tweaks for some
> > corner cases."
>
>
> So for cpu feautres, it works since the management have other tool to
> the cpuid. Then management will make sure the migration happens amongs
> the hosts that is compatibile with the same cpuid sets.
>
> For vhost, we don't have such capabilities, that's why I think we need
> to have fallback.
>
Hi Jason,
What, from your POV was the result of v1 discussion?
IMO, there was one critical comment that the patch does not address
'forcevhost' properly (indeed).
IMO, there are many comments from Daniel and Michael that in the sum
say that this change is not what they would like.
If I'm mistaken please let me know.

I have no problem to send v3 = v1 + handling of ''forcevhost'
If this is what you want, please let me know also.

>
> > https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg03187.html
> > And it was Michael's question:
> > "Can we limit the change to when a VM is migrated in?"
> > https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg03163.html
> > So I'm trying to suggest another approach:
> > - In case of conflicting features (for example RSS and vhost) we in
> > qemu we do not have enough information to prefer one or another.
> > - If we drop to userspace in the first set_features we say: "vhost is
> > less important than other requested features"
> > - This series keeps backward compatibility, i.e. if you start with
> > vhost and some features are not available - they are silently cleared.
> > - But in case the features are available on source machine - they are used
> > - In case of migration this series says: "We prefer successful
> > migration even if for that we need to drop to userspace"
> > - On the migration back to the 1st system we again work with all the
> > features and with vhost as all the features are available.
>
>
> One issue for this approach is that. Consider we had two drivers:
>
> 1) Driver A that supports split only
> 2) Driver B that supports packed
>
> Consider src support packed but dest doesn't
>
> So switching driver A to driver B works without migration. But if we
> switch driver from A to B after migration it won't work?

I assume that  both src and dest started with vhost=on.

As driver B supports both packed and split, you can switch from driver
A to driver B after migration
and driver B will work with split. Exactly as it does today.

The key question is what is more important - vhost or features that
vhost does not support?
current code says: vhost is more important always
v1 patch says: features are more important always.
v2 patch says: vhost is more important at init time, features are more
important at migration time.
Because we are able to drop vhost but we can't drop features when we
have a running driver.
Do you agree?

>
> Thanks
>
>
> >
> > Thanks,
> > Yuri
> >
> >
> >
> > On Thu, Mar 25, 2021 at 8:59 AM Jason Wang  wrote:
> >>
> >> 在 2021/3/22 下午8:24, Yuri Benditovich 写道:
> >>> Allow fallback to userspace only upon migration, only for specific 
> >>> features
> >>> and only if 'vhostforce' is not requested.
> >>>
> >>> Changes from v1:
> >>> Patch 1 dropeed (will be submitted in another series)
> >>> Added device callback in case the migration should fail due to missing 
> >>> features
> >>
> >> Hi Yuri:
> >>
> >> Have a quick glance at the series. A questions is why we need to do the
> >> fallback only during load?
> >>
> >> I think we should do it in the device initializating. E.g when the vhost
> >> features can not satisfy, we should disable vhost since there.
> >>
> >> Thanks
> >>
> >>
> >>> Yuri Benditovich (3):
> >>> net: add ability to hide (disable) vhost_net
> >>> virtio: introduce 'missing_features_migrated' device callback
> >>> virtio-net: implement missing_features_migrated callback
> >>>
> >>>hw/net/vhost_net.c |  4 ++-
> >>>hw/net/virtio-net.c| 51 ++
> >>>hw/virtio/virtio.c |  8 ++
> >>>include/hw/virtio/virtio.h |  8 ++
> >>>include/net/net.h  |  1 +
> >>>5 files changed, 71 insertions(+), 1 deletion(-)
> >>>
>

Re: [RFC PATCH v2 0/3] virtio-net: graceful drop of vhost for TAP

2021-03-25 Thread Yuri Benditovich

Hi Jason,

This was discussed earlier on the previous series of patches.
https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg01829.html
There were strong objections from both Daniel and Michael and I feel
that the series was rejected.
There was Michael's claim:
"We did what this patch is trying to change for years now, in
particular KVM also seems to happily disable CPU features not supported
by kernel so I wonder why we can't keep doing it, with tweaks for some
corner cases."
https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg03187.html
And it was Michael's question:
"Can we limit the change to when a VM is migrated in?"
https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg03163.html
So I'm trying to suggest another approach:
- In case of conflicting features (for example RSS and vhost) we in
qemu we do not have enough information to prefer one or another.
- If we drop to userspace in the first set_features we say: "vhost is
less important than other requested features"
- This series keeps backward compatibility, i.e. if you start with
vhost and some features are not available - they are silently cleared.
- But in case the features are available on source machine - they are used
- In case of migration this series says: "We prefer successful
migration even if for that we need to drop to userspace"
- On the migration back to the 1st system we again work with all the
features and with vhost as all the features are available.

Thanks,
Yuri



On Thu, Mar 25, 2021 at 8:59 AM Jason Wang  wrote:
>
>
> 在 2021/3/22 下午8:24, Yuri Benditovich 写道:
> > Allow fallback to userspace only upon migration, only for specific features
> > and only if 'vhostforce' is not requested.
> >
> > Changes from v1:
> > Patch 1 dropeed (will be submitted in another series)
> > Added device callback in case the migration should fail due to missing 
> > features
>
>
> Hi Yuri:
>
> Have a quick glance at the series. A questions is why we need to do the
> fallback only during load?
>
> I think we should do it in the device initializating. E.g when the vhost
> features can not satisfy, we should disable vhost since there.
>
> Thanks
>
>
> >
> > Yuri Benditovich (3):
> >net: add ability to hide (disable) vhost_net
> >virtio: introduce 'missing_features_migrated' device callback
> >virtio-net: implement missing_features_migrated callback
> >
> >   hw/net/vhost_net.c |  4 ++-
> >   hw/net/virtio-net.c| 51 ++
> >   hw/virtio/virtio.c |  8 ++
> >   include/hw/virtio/virtio.h |  8 ++
> >   include/net/net.h  |  1 +
> >   5 files changed, 71 insertions(+), 1 deletion(-)
> >
>

Re: [PATCH v2 1/2] virtio-pci: add check for vdev in virtio_pci_isr_read

2021-03-23 Thread Yuri Benditovich

Ping


On Mon, Mar 15, 2021 at 1:59 PM Yuri Benditovich
 wrote:
>
> https://bugzilla.redhat.com/show_bug.cgi?id=1743098
> This commit completes the solution of segfault in hot unplug flow
> (by commit ccec7e9603f446fe75c6c563ba335c00cfda6a06).
> Added missing check for vdev in virtio_pci_isr_read.
> Typical stack of crash:
> virtio_pci_isr_read ../hw/virtio/virtio-pci.c:1365 with proxy-vdev = 0
> memory_region_read_accessor at ../softmmu/memory.c:442
> access_with_adjusted_size at ../softmmu/memory.c:552
> memory_region_dispatch_read1 at ../softmmu/memory.c:1420
> memory_region_dispatch_read  at ../softmmu/memory.c:1449
> flatview_read_continue at ../softmmu/physmem.c:2822
> flatview_read at ../softmmu/physmem.c:2862
> address_space_read_full at ../softmmu/physmem.c:2875
>
> Signed-off-by: Yuri Benditovich 
> ---
>  hw/virtio/virtio-pci.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 883045a223..4a3dcee771 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1364,9 +1364,14 @@ static uint64_t virtio_pci_isr_read(void *opaque, 
> hwaddr addr,
>  {
>  VirtIOPCIProxy *proxy = opaque;
>  VirtIODevice *vdev = virtio_bus_get_device(>bus);
> -uint64_t val = qatomic_xchg(>isr, 0);
> -pci_irq_deassert(>pci_dev);
> +uint64_t val;
> +
> +if (vdev == NULL) {
> +return 0;
> +}
>
> +val = qatomic_xchg(>isr, 0);
> +pci_irq_deassert(>pci_dev);
>  return val;
>  }
>
> --
> 2.17.1
>

[RFC PATCH v2 3/3] virtio-net: implement missing_features_migrated callback

2021-03-22 Thread Yuri Benditovich

Graceful drop to userspace virtio in case selected features
are missing on the destination system. Currently used for
3 features that might be supported by the vhost kernel on
the source machine and not supported on the destination machine:
rss, hash reporting, packed ring.

Signed-off-by: Yuri Benditovich 
---
 hw/net/virtio-net.c | 51 +
 1 file changed, 51 insertions(+)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 96a3cc8357..97afca34e7 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -527,6 +527,15 @@ static RxFilterInfo 
*virtio_net_query_rxfilter(NetClientState *nc)
 return info;
 }
 
+static void virtio_net_allow_vhost(VirtIONet *n, bool allow)
+{
+int i;
+for (i = 0; i < n->max_queues; i++) {
+NetClientState *nc = qemu_get_subqueue(n->nic, i)->peer;
+nc->vhost_net_disabled = !allow;
+}
+}
+
 static void virtio_net_reset(VirtIODevice *vdev)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
@@ -564,6 +573,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 }
 }
+virtio_net_allow_vhost(n, true);
 }
 
 static void peer_test_vnet_hdr(VirtIONet *n)
@@ -701,6 +711,27 @@ static void virtio_net_set_queues(VirtIONet *n)
 }
 }
 
+static bool can_disable_vhost(VirtIONet *n)
+{
+NetClientState *peer = qemu_get_queue(n->nic)->peer;
+NetdevInfo *ndi;
+if (!get_vhost_net(peer)) {
+return false;
+}
+if (!peer) {
+return true;
+}
+if (peer->info->type != NET_CLIENT_DRIVER_TAP) {
+return false;
+}
+ndi = peer->stored_config;
+if (ndi && ndi->u.tap.has_vhostforce && ndi->u.tap.vhostforce) {
+printf("vhost forced, can't drop it\n");
+return false;
+}
+return true;
+}
+
 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 
 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
@@ -3433,6 +3464,25 @@ static bool dev_unplug_pending(void *opaque)
 return vdc->primary_unplug_pending(dev);
 }
 
+static bool virtio_net_missing_features_migrated(VirtIODevice *vdev,
+ uint64_t missing)
+{
+VirtIONet *n = VIRTIO_NET(vdev);
+bool disable_vhost = false;
+if (virtio_has_feature(missing, VIRTIO_NET_F_HASH_REPORT) ||
+virtio_has_feature(missing, VIRTIO_NET_F_RSS) ||
+virtio_has_feature(missing, VIRTIO_F_RING_PACKED)) {
+disable_vhost = true;
+}
+disable_vhost = disable_vhost && can_disable_vhost(n);
+if (disable_vhost) {
+warn_report("falling back to userspace virtio due to missing"
+" features %lx", missing);
+virtio_net_allow_vhost(n, false);
+}
+return disable_vhost;
+}
+
 static const VMStateDescription vmstate_virtio_net = {
 .name = "virtio-net",
 .minimum_version_id = VIRTIO_NET_VM_VERSION,
@@ -3527,6 +3577,7 @@ static void virtio_net_class_init(ObjectClass *klass, 
void *data)
 vdc->get_features = virtio_net_get_features;
 vdc->set_features = virtio_net_set_features;
 vdc->bad_features = virtio_net_bad_features;
+vdc->missing_features_migrated = virtio_net_missing_features_migrated;
 vdc->reset = virtio_net_reset;
 vdc->set_status = virtio_net_set_status;
 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
-- 
2.26.2

[RFC PATCH v2 2/3] virtio: introduce 'missing_features_migrated' device callback

2021-03-22 Thread Yuri Benditovich

This optional callback addresses migration problem in case
some of negotiated features not present on the destination
system. The device has a chance to avoid migration failure.

Signed-off-by: Yuri Benditovich 
---
 hw/virtio/virtio.c | 8 
 include/hw/virtio/virtio.h | 8 
 2 files changed, 16 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 07f4e60b30..36dcac75e5 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3107,6 +3107,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 vdev->device_endian = virtio_default_endian();
 }
 
+if (vdc->missing_features_migrated) {
+uint64_t missing = (vdev->guest_features & ~(vdev->host_features));
+if (missing && vdc->missing_features_migrated(vdev, missing)) {
+vdev->host_features =
+vdc->get_features(vdev, vdev->host_features, NULL);
+}
+}
+
 if (virtio_64bit_features_needed(vdev)) {
 /*
  * Subsection load filled vdev->guest_features.  Run them
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index b7ece7a6a8..fbfbec6ef2 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -158,6 +158,14 @@ struct VirtioDeviceClass {
  * processed, e.g. for bounds checking.
  */
 int (*post_load)(VirtIODevice *vdev);
+/* In case when some of negotiated features are missing on the destination
+   system, the migration is expected to fail. To avoid such failure, the
+   device may implement this callback and apply graceful configuration
+   change to extend host features (for example, disable vhost).
+   If the device returns true the virtio reinitializes the host features
+   and further set_features call may succeed.
+ */
+bool (*missing_features_migrated)(VirtIODevice *vdev, uint64_t val);
 const VMStateDescription *vmsd;
 bool (*primary_unplug_pending)(void *opaque);
 };
-- 
2.26.2

[RFC PATCH v2 0/3] virtio-net: graceful drop of vhost for TAP

2021-03-22 Thread Yuri Benditovich

Allow fallback to userspace only upon migration, only for specific features
and only if 'vhostforce' is not requested.

Changes from v1:
Patch 1 dropeed (will be submitted in another series)
Added device callback in case the migration should fail due to missing features

Yuri Benditovich (3):
  net: add ability to hide (disable) vhost_net
  virtio: introduce 'missing_features_migrated' device callback
  virtio-net: implement missing_features_migrated callback

 hw/net/vhost_net.c |  4 ++-
 hw/net/virtio-net.c| 51 ++
 hw/virtio/virtio.c |  8 ++
 include/hw/virtio/virtio.h |  8 ++
 include/net/net.h  |  1 +
 5 files changed, 71 insertions(+), 1 deletion(-)

-- 
2.26.2

[RFC PATCH v2 1/3] net: add ability to hide (disable) vhost_net

2021-03-22 Thread Yuri Benditovich

If 'vhost_net_disabled' in the NetClientState of the
net device, get_vhost_net for TAP returns NULL. Network adapters
can use this ability to hide the vhost_net temporary between
resets in case some active features contradict with vhost.

Signed-off-by: Yuri Benditovich 
---
 hw/net/vhost_net.c | 4 +++-
 include/net/net.h  | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 24d555e764..6660efd9ea 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -436,7 +436,9 @@ VHostNetState *get_vhost_net(NetClientState *nc)
 
 switch (nc->info->type) {
 case NET_CLIENT_DRIVER_TAP:
-vhost_net = tap_get_vhost_net(nc);
+if (!nc->vhost_net_disabled) {
+vhost_net = tap_get_vhost_net(nc);
+}
 break;
 #ifdef CONFIG_VHOST_NET_USER
 case NET_CLIENT_DRIVER_VHOST_USER:
diff --git a/include/net/net.h b/include/net/net.h
index a02949f6db..a938211524 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -103,6 +103,7 @@ struct NetClientState {
 int vring_enable;
 int vnet_hdr_len;
 bool is_netdev;
+bool vhost_net_disabled;
 QTAILQ_HEAD(, NetFilterState) filters;
 };
 
-- 
2.26.2

[PATCH v2 1/2] virtio-pci: add check for vdev in virtio_pci_isr_read

2021-03-15 Thread Yuri Benditovich

https://bugzilla.redhat.com/show_bug.cgi?id=1743098
This commit completes the solution of segfault in hot unplug flow
(by commit ccec7e9603f446fe75c6c563ba335c00cfda6a06).
Added missing check for vdev in virtio_pci_isr_read.
Typical stack of crash:
virtio_pci_isr_read ../hw/virtio/virtio-pci.c:1365 with proxy-vdev = 0
memory_region_read_accessor at ../softmmu/memory.c:442
access_with_adjusted_size at ../softmmu/memory.c:552
memory_region_dispatch_read1 at ../softmmu/memory.c:1420
memory_region_dispatch_read  at ../softmmu/memory.c:1449
flatview_read_continue at ../softmmu/physmem.c:2822
flatview_read at ../softmmu/physmem.c:2862
address_space_read_full at ../softmmu/physmem.c:2875

Signed-off-by: Yuri Benditovich 
---
 hw/virtio/virtio-pci.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 883045a223..4a3dcee771 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1364,9 +1364,14 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr 
addr,
 {
 VirtIOPCIProxy *proxy = opaque;
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
-uint64_t val = qatomic_xchg(>isr, 0);
-pci_irq_deassert(>pci_dev);
+uint64_t val;
+
+if (vdev == NULL) {
+return 0;
+}
 
+val = qatomic_xchg(>isr, 0);
+pci_irq_deassert(>pci_dev);
 return val;
 }
 
-- 
2.17.1

[PATCH v2 2/2] virtio-pci: remove explicit initialization of val

2021-03-15 Thread Yuri Benditovich

The value is assigned later in this procedure.

Signed-off-by: Yuri Benditovich 
---
 hw/virtio/virtio-pci.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 4a3dcee771..c1b67cf6fc 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1385,10 +1385,10 @@ static uint64_t virtio_pci_device_read(void *opaque, 
hwaddr addr,
 {
 VirtIOPCIProxy *proxy = opaque;
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
-uint64_t val = 0;
+uint64_t val;
 
 if (vdev == NULL) {
-return val;
+return 0;
 }
 
 switch (size) {
@@ -1401,6 +1401,9 @@ static uint64_t virtio_pci_device_read(void *opaque, 
hwaddr addr,
 case 4:
 val = virtio_config_modern_readl(vdev, addr);
 break;
+default:
+val = 0;
+break;
 }
 return val;
 }
-- 
2.17.1

[PATCH v2 0/2] virtio-pci: add check for vdev in virtio_pci_isr_read

2021-03-15 Thread Yuri Benditovich

This commit completes the solution of segfault in hot unplug flow
(by commit ccec7e9603f446fe75c6c563ba335c00cfda6a06).
Added missing check for vdev in virtio_pci_isr_read.

v1->v2:
Added crash stack
Updated commit comment
Cosmetic change in additional procedure in this file per request
of Philippe Mathieu-Daude

Yuri Benditovich (2):
  virtio-pci: add check for vdev in virtio_pci_isr_read
  virtio-pci: remove explicit initialization of val

 hw/virtio/virtio-pci.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

-- 
2.17.1

Re: [PATCH 0/3] virtio-net: graceful drop of vhost for TAP

2021-02-18 Thread Yuri Benditovich

On Thu, Feb 18, 2021 at 11:35 AM Daniel P. Berrangé 
wrote:

> On Wed, Feb 10, 2021 at 02:19:59PM +0800, Jason Wang wrote:
> >
> > On 2021/2/9 下午11:04, Michael S. Tsirkin wrote:
> > > On Tue, Feb 09, 2021 at 02:51:05PM +, Daniel P. Berrangé wrote:
> > > > On Tue, Feb 09, 2021 at 09:34:20AM -0500, Michael S. Tsirkin wrote:
> > > > > On Thu, Feb 04, 2021 at 10:29:12PM +0200, Yuri Benditovich wrote:
> > > > > > This set of patches introduces graceful switch from tap-vhost to
> > > > > > tap-no-vhost depending on guest features. Before that the
> features
> > > > > > that vhost does not support were silently cleared in
> get_features.
> > > > > > This creates potential problem of migration from the machine
> where
> > > > > > some of virtio-net features are supported by the vhost kernel to
> the
> > > > > > machine where they are not supported (packed ring as an example).
> > > > > I still worry that adding new features will silently disable vhost
> for people.
> > > > > Can we limit the change to when a VM is migrated in?
> > > > Some management applications expect bi-directional live migration to
> > > > work, so taking specific actions on incoming migration only feels
> > > > dangerous.
> > > Could you be more specific?
> > >
> > > Bi-directional migration is currently broken
> > > when migrating new kernel->old kernel.
> > >
> > > This seems to be the motivation for this patch, though I wish
> > > it was spelled out more explicitly.
> > >
> > > People don't complain much, but I'm fine with fixing that
> > > with a userspace fallback.
> > >
> > >
> > > I'd rather not force the fallback on others though: vhost is generally
> > > specified explicitly by user while features are generally set
> > > automatically, so this patch will make us override what user specified,
> > > not nice.
> > >
> > >
> > > > IMHO if the features we're adding cannot be expected to exist in
> > > > host kernels in general, then the feature should defualt to off
> > > > and require explicit user config to enable.
> > > > Downstream distros which can guarantee newer kernels can flip the
> > > > default in their custom machine types if they desire.
> > > >
> > > > Regards,
> > > > Daniel
> > > Unfortunately that will basically mean we are stuck with no new
> features
> > > for years. We did what this patch is trying to change for years now, in
> > > particular KVM also seems to happily disable CPU features not supported
> > > by kernel so I wonder why we can't keep doing it, with tweaks for some
> > > corner cases.
> >
> >
> > It's probably not the corner case.
> >
> > So my understanding is when a feature is turned on via command line, it
> > should not be cleared silently otherwise we may break migration for sure.
> >
> > E.g when packed=on is specified, we should disable vhost instead of
> clear it
> > from the device.
>
> If something is explicitly turned on by the user, they expect that feature
> to be honoured, or an error to be raised.
>
> If something is not explicitly turned on by the user, the behaviour wrt the
> default should be stable for any given machine type version.
>
> IOW, if you disable vhost by default when packed=on is set, then you can't
> later switch to letting vhost be enabled with packed=on, unless you tie
> that change to a new machine type.
>
> If the user has explicitly said  packed=on *and* vhost=on, then should
> must honour that, or raise an error if the combination is unsupportable.
> Silently disabling vhost, then vhost=on is not ok.
>

If I'm not mistaken:
Inside qemu there is no possibility to determine whether the user
explicitly turned vhost on.
For qemu the vhost is off by default but libvirt creates a new profile with
vhost on.


>
> Regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>

[PATCH] virtio-pci: add check for vdev in virtio_pci_isr_read

2021-02-15 Thread Yuri Benditovich

https://bugzilla.redhat.com/show_bug.cgi?id=1743098
There is missing check for vdev in this procedure.
QEMU crash happens in it in hot unplug flow.

Signed-off-by: Yuri Benditovich 
---
 hw/virtio/virtio-pci.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 094c36aa3e..2f19301267 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1364,7 +1364,13 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr 
addr,
 {
 VirtIOPCIProxy *proxy = opaque;
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
-uint64_t val = qatomic_xchg(>isr, 0);
+uint64_t val = 0;
+
+if (vdev == NULL) {
+return val;
+}
+
+val = qatomic_xchg(>isr, 0);
 pci_irq_deassert(>pci_dev);
 
 return val;
-- 
2.17.1

Re: [PATCH 3/3] virtio-net: graceful fallback to vhost=off for tap netdev

2021-02-08 Thread Yuri Benditovich

On Mon, Feb 8, 2021 at 5:15 AM Jason Wang  wrote:
>
>
> On 2021/2/5 下午9:38, Michael S. Tsirkin wrote:
> > On Thu, Feb 04, 2021 at 10:29:15PM +0200, Yuri Benditovich wrote:
> >> Currently virtio-net silently clears features if they are
> >> not supported by respective vhost. This may create migration
> >> problems in future if vhost features on the source and destination
> >> are different. Implement graceful fallback to no-vhost mode
> >> when some acked features contradict with vhost. The decision is
> >> taken on set_features call and the vhost will be disabled
> >> till next reset (or migration).
> >> Such fallback is currently enabled only for TAP netdev.
> >>
> >> Signed-off-by: Yuri Benditovich
> > Sounds good, but I don't think we should do this if
> > vhostforce=on is set.
>
>
> If we do this, does it mean we won't maintain migration compatibility
> when vhostforce is on?

AFAIU, the 'vhostforce=on' should mean the vhost can't be disabled (if
I'm not mistaken this is typically used for vhost-user).
So we can view this case as similar to vhost-vdpa and vhost-user.

>
> Thanks
>
>
> >
> > Also, let's document this behaviour with the vhost option so people
> > are not suprized.
> >
>

Re: [PATCH 3/3] virtio-net: graceful fallback to vhost=off for tap netdev

2021-02-08 Thread Yuri Benditovich

On Mon, Feb 8, 2021 at 6:11 AM Jason Wang  wrote:
>
>
> On 2021/2/5 上午4:29, Yuri Benditovich wrote:
> > Currently virtio-net silently clears features if they are
> > not supported by respective vhost. This may create migration
> > problems in future if vhost features on the source and destination
> > are different. Implement graceful fallback to no-vhost mode
> > when some acked features contradict with vhost. The decision is
> > taken on set_features call and the vhost will be disabled
> > till next reset (or migration).
> > Such fallback is currently enabled only for TAP netdev.
> >
> > Signed-off-by: Yuri Benditovich 
> > ---
> >   hw/net/virtio-net.c | 58 ++---
> >   1 file changed, 50 insertions(+), 8 deletions(-)
> >
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 5150f295e8..b353060e63 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -515,6 +515,15 @@ static RxFilterInfo 
> > *virtio_net_query_rxfilter(NetClientState *nc)
> >   return info;
> >   }
> >
> > +static void virtio_net_allow_vhost(VirtIONet *n, bool allow)
> > +{
> > +int i;
> > +for (i = 0; i < n->max_queues; i++) {
> > +NetClientState *nc = qemu_get_subqueue(n->nic, i)->peer;
> > +nc->vhost_net_disabled = !allow;
> > +}
> > +}
> > +
> >   static void virtio_net_reset(VirtIODevice *vdev)
> >   {
> >   VirtIONet *n = VIRTIO_NET(vdev);
> > @@ -552,6 +561,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
> >   assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
> >   }
> >   }
> > +virtio_net_allow_vhost(n, true);
> >   }
> >
> >   static void peer_test_vnet_hdr(VirtIONet *n)
> > @@ -689,6 +699,15 @@ static void virtio_net_set_queues(VirtIONet *n)
> >   }
> >   }
> >
> > +static bool can_disable_vhost(VirtIONet *n)
> > +{
> > +NetClientState *peer = qemu_get_queue(n->nic)->peer;
> > +if (!get_vhost_net(peer)) {
> > +return false;
> > +}
> > +return !peer || peer->info->type == NET_CLIENT_DRIVER_TAP;
> > +}
> > +
> >   static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
> >
> >   static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t 
> > features,
> > @@ -725,14 +744,14 @@ static uint64_t virtio_net_get_features(VirtIODevice 
> > *vdev, uint64_t features,
> >   return features;
> >   }
> >
> > -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > -virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
> > -features = vhost_net_get_features(get_vhost_net(nc->peer), features);
> > -vdev->backend_features = features;
> > +vdev->backend_features = 
> > vhost_net_get_features(get_vhost_net(nc->peer), features);
> >
> > -if (n->mtu_bypass_backend &&
> > -(n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
> > -features |= (1ULL << VIRTIO_NET_F_MTU);
> > +if (!can_disable_vhost(n)) {
> > +features = vdev->backend_features;
> > +if (n->mtu_bypass_backend &&
> > +(n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
> > +features |= (1ULL << VIRTIO_NET_F_MTU);
> > +}
> >   }
> >
> >   return features;
> > @@ -872,10 +891,25 @@ static void failover_add_primary(VirtIONet *n, Error 
> > **errp)
> >   error_propagate(errp, err);
> >   }
> >
> > +static bool check_vhost_features(VirtIONet *n, uint64_t features)
> > +{
> > +NetClientState *nc = qemu_get_queue(n->nic);
> > +uint64_t filtered;
> > +if (n->rss_data.redirect) {
> > +return false;
> > +}
> > +filtered = vhost_net_get_features(get_vhost_net(nc->peer), features);
> > +if (filtered != features) {
> > +return false;
> > +}
> > +return true;
> > +}
> > +
> >   static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
> >   {
> >   VirtIONet *n = VIRTIO_NET(vdev);
> >   Error *err = NULL;
> > +bool disable_vhost = false;
> >   int i;
> >
> >   if (n->mtu_bypass_backend &&
> > @@ -894,13 +928,21 @@ static void virtio_net_set_features(VirtIODevice 
> > *vdev, uint64_t features)
> >

[PATCH 3/3] virtio-net: graceful fallback to vhost=off for tap netdev

2021-02-04 Thread Yuri Benditovich

Currently virtio-net silently clears features if they are
not supported by respective vhost. This may create migration
problems in future if vhost features on the source and destination
are different. Implement graceful fallback to no-vhost mode
when some acked features contradict with vhost. The decision is
taken on set_features call and the vhost will be disabled
till next reset (or migration).
Such fallback is currently enabled only for TAP netdev.

Signed-off-by: Yuri Benditovich 
---
 hw/net/virtio-net.c | 58 ++---
 1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 5150f295e8..b353060e63 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -515,6 +515,15 @@ static RxFilterInfo 
*virtio_net_query_rxfilter(NetClientState *nc)
 return info;
 }
 
+static void virtio_net_allow_vhost(VirtIONet *n, bool allow)
+{
+int i;
+for (i = 0; i < n->max_queues; i++) {
+NetClientState *nc = qemu_get_subqueue(n->nic, i)->peer;
+nc->vhost_net_disabled = !allow;
+}
+}
+
 static void virtio_net_reset(VirtIODevice *vdev)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
@@ -552,6 +561,7 @@ static void virtio_net_reset(VirtIODevice *vdev)
 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 }
 }
+virtio_net_allow_vhost(n, true);
 }
 
 static void peer_test_vnet_hdr(VirtIONet *n)
@@ -689,6 +699,15 @@ static void virtio_net_set_queues(VirtIONet *n)
 }
 }
 
+static bool can_disable_vhost(VirtIONet *n)
+{
+NetClientState *peer = qemu_get_queue(n->nic)->peer;
+if (!get_vhost_net(peer)) {
+return false;
+}
+return !peer || peer->info->type == NET_CLIENT_DRIVER_TAP;
+}
+
 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 
 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
@@ -725,14 +744,14 @@ static uint64_t virtio_net_get_features(VirtIODevice 
*vdev, uint64_t features,
 return features;
 }
 
-virtio_clear_feature(, VIRTIO_NET_F_RSS);
-virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
-features = vhost_net_get_features(get_vhost_net(nc->peer), features);
-vdev->backend_features = features;
+vdev->backend_features = vhost_net_get_features(get_vhost_net(nc->peer), 
features);
 
-if (n->mtu_bypass_backend &&
-(n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
-features |= (1ULL << VIRTIO_NET_F_MTU);
+if (!can_disable_vhost(n)) {
+features = vdev->backend_features;
+if (n->mtu_bypass_backend &&
+(n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
+features |= (1ULL << VIRTIO_NET_F_MTU);
+}
 }
 
 return features;
@@ -872,10 +891,25 @@ static void failover_add_primary(VirtIONet *n, Error 
**errp)
 error_propagate(errp, err);
 }
 
+static bool check_vhost_features(VirtIONet *n, uint64_t features)
+{
+NetClientState *nc = qemu_get_queue(n->nic);
+uint64_t filtered;
+if (n->rss_data.redirect) {
+return false;
+}
+filtered = vhost_net_get_features(get_vhost_net(nc->peer), features);
+if (filtered != features) {
+return false;
+}
+return true;
+}
+
 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
 Error *err = NULL;
+bool disable_vhost = false;
 int i;
 
 if (n->mtu_bypass_backend &&
@@ -894,13 +928,21 @@ static void virtio_net_set_features(VirtIODevice *vdev, 
uint64_t features)
   VIRTIO_F_VERSION_1),
virtio_has_feature(features,
   VIRTIO_NET_F_HASH_REPORT));
-
 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 
+if (can_disable_vhost(n)) {
+disable_vhost = !check_vhost_features(n, features);
+}
+if (disable_vhost) {
+warn_report("Some of requested features aren't supported by vhost, "
+"vhost is turned off till next reset");
+virtio_net_allow_vhost(n, false);
+}
+
 if (n->has_vnet_hdr) {
 n->curr_guest_offloads =
 virtio_net_guest_offloads_by_features(features);
-- 
2.17.1

[PATCH 2/3] net: add ability to hide (disable) vhost_net

2021-02-04 Thread Yuri Benditovich

If 'vhost_net_disabled' in the NetClientState of the
net device, get_vhost_net for TAP returns NULL. Network adapters
can use this ability to hide the vhost_net temporary between
resets in case some active features contradict with vhost.

Signed-off-by: Yuri Benditovich 
---
 hw/net/vhost_net.c | 4 +++-
 include/net/net.h  | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 8282e440bd..7873d27a36 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -437,7 +437,9 @@ VHostNetState *get_vhost_net(NetClientState *nc)
 
 switch (nc->info->type) {
 case NET_CLIENT_DRIVER_TAP:
-vhost_net = tap_get_vhost_net(nc);
+if (!nc->vhost_net_disabled) {
+vhost_net = tap_get_vhost_net(nc);
+}
 break;
 #ifdef CONFIG_VHOST_NET_USER
 case NET_CLIENT_DRIVER_VHOST_USER:
diff --git a/include/net/net.h b/include/net/net.h
index 919facaad2..4479bdcec0 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -100,6 +100,7 @@ struct NetClientState {
 int vring_enable;
 int vnet_hdr_len;
 bool is_netdev;
+bool vhost_net_disabled;
 QTAILQ_HEAD(, NetFilterState) filters;
 };
 
-- 
2.17.1

[PATCH 0/3] virtio-net: graceful drop of vhost for TAP

2021-02-04 Thread Yuri Benditovich

This set of patches introduces graceful switch from tap-vhost to
tap-no-vhost depending on guest features. Before that the features
that vhost does not support were silently cleared in get_features.
This creates potential problem of migration from the machine where
some of virtio-net features are supported by the vhost kernel to the
machine where they are not supported (packed ring as an example).

Instead of silent masking of the features virtio-net gracefully
disables the vhost at set_features if some features acked by the
guest contradict with kernel vhost capabilities.

This set of patches also makes get_vhost_net() call (that used
everywhere) to always return actual result, i.e. initially it
returns non-NULL value and from the moment the vhost was disabled
the call will return NULL. Such a way we avoid any unexpected
calls to vhost functions.
Yuri Benditovich (3):
  vhost-net: add VIRTIO_NET_F_HASH_REPORT to the list of kernel features
  net: add ability to hide (disable) vhost_net
  virtio-net: graceful fallback to vhost=off for tap netdev

 hw/net/vhost_net.c  |  5 +++-
 hw/net/virtio-net.c | 58 ++---
 include/net/net.h   |  1 +
 3 files changed, 55 insertions(+), 9 deletions(-)

-- 
2.17.1

[PATCH 1/3] vhost-net: add VIRTIO_NET_F_HASH_REPORT to the list of kernel features

2021-02-04 Thread Yuri Benditovich

In case of vhost TAP the kernel must support this feature,
otherwise the device can't offer it.

Signed-off-by: Yuri Benditovich 
---
 hw/net/vhost_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 24d555e764..8282e440bd 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -45,6 +45,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
 VIRTIO_F_RING_PACKED,
+VIRTIO_NET_F_HASH_REPORT,
 VHOST_INVALID_FEATURE_BIT
 };
 
-- 
2.17.1

Re: [RFC PATCH v3 4/6] ebpf: Added eBPF RSS loader.

2021-01-25 Thread Yuri Benditovich

On Mon, Jan 25, 2021 at 11:03 AM Jason Wang  wrote:
>
>
> On 2021/1/19 下午10:53, Yuri Benditovich wrote:
> > On Fri, Jan 15, 2021 at 9:02 AM Jason Wang  wrote:
> >>
> >> On 2021/1/15 上午5:16, Andrew Melnychenko wrote:
> >>> From: Andrew 
> >>>
> >>> Added function that loads RSS eBPF program.
> >>> Added stub functions for RSS eBPF loader.
> >>> Added meson and configuration options.
> >>>
> >>> By default, eBPF feature enabled if libbpf is present in the build system.
> >>> libbpf checked in configuration shell script and meson script.
> >>>
> >>> Signed-off-by: Yuri Benditovich 
> >>> Signed-off-by: Andrew Melnychenko 
> >>> ---
> >>>configure   |  33 
> >>>ebpf/ebpf_rss-stub.c|  40 
> >>>ebpf/ebpf_rss.c | 165 +
> >>>ebpf/ebpf_rss.h |  44 +
> >>>ebpf/meson.build|   1 +
> >>>ebpf/rss.bpf.skeleton.h | 397 
> >>>ebpf/trace-events   |   4 +
> >>>ebpf/trace.h|   2 +
> >>>meson.build |  13 ++
> >>>9 files changed, 699 insertions(+)
> >>>create mode 100644 ebpf/ebpf_rss-stub.c
> >>>create mode 100644 ebpf/ebpf_rss.c
> >>>create mode 100644 ebpf/ebpf_rss.h
> >>>create mode 100644 ebpf/meson.build
> >>>create mode 100644 ebpf/rss.bpf.skeleton.h
> >>>create mode 100644 ebpf/trace-events
> >>>create mode 100644 ebpf/trace.h
> >>>
> >>> diff --git a/configure b/configure
> >>> index 5860bdb77b..9d18e941f5 100755
> >>> --- a/configure
> >>> +++ b/configure
> >>> @@ -342,6 +342,7 @@ vhost_vsock="$default_feature"
> >>>vhost_user="no"
> >>>vhost_user_blk_server="auto"
> >>>vhost_user_fs="$default_feature"
> >>> +bpf=""
> >>>kvm="auto"
> >>>hax="auto"
> >>>hvf="auto"
> >>> @@ -1236,6 +1237,10 @@ for opt do
> >>>  ;;
> >>>  --enable-membarrier) membarrier="yes"
> >>>  ;;
> >>> +  --disable-bpf) bpf="no"
> >>> +  ;;
> >>> +  --enable-bpf) bpf="yes"
> >>> +  ;;
> >>>  --disable-blobs) blobs="false"
> >>>  ;;
> >>>  --with-pkgversion=*) pkgversion="$optarg"
> >>> @@ -1845,6 +1850,7 @@ disabled with --disable-FEATURE, default is enabled 
> >>> if available
> >>>  vhost-user  vhost-user backend support
> >>>  vhost-user-blk-servervhost-user-blk server support
> >>>  vhost-vdpa  vhost-vdpa kernel backend support
> >>> +  bpf BPF kernel support
> >>>  spice   spice
> >>>  rbd rados block device (rbd)
> >>>  libiscsiiscsi support
> >>> @@ -5057,6 +5063,30 @@ else
> >>>membarrier=no
> >>>fi
> >>>
> >>> +##
> >>> +# check for usable bpf system call
> >>> +if test "$bpf" = ""; then
> >>
> >> This implies the bpf is enabled by default?
> > Yes, assuming libbpf-devel present and bpf system call defined.
> >
> > Any problem with it?
>
>
> It means the configure will fail if libbpf is not installed. Consider
> libbpf is not very common at current stage. I think it's better to make
> it auto or disabled by default.
>
>
> >
> >>
> >>> +have_bpf=no
> >>> +if test "$linux" = "yes" -a "$bigendian" != "yes"; then
> >>> +cat > $TMPC << EOF
> >>> +#include 
> >>> +#include 
> >>> +int main(void) {
> >>> +struct bpf_object *obj = NULL;
> >>> +bpf_object__load(obj);
> >>> +exit(0);
> >>> +}
> >>> +EOF
> >>> +if compile_prog "" "-lbpf" ; then
> >>> +have_bpf=yes
> >>> +bpf=yes
> >>> +fi
> >>> +fi
> >>> +if test "$have_bpf" = "

Re: [RFC PATCH v3 5/6] virtio-net: Added eBPF RSS to virtio-net.

2021-01-24 Thread Yuri Benditovich

On Sun, Jan 24, 2021 at 10:24 AM Yuri Benditovich
 wrote:
>
> Hi Jason,
>
> I've prepared a POC of graceful switch to 'vhost off' if respective
> features are acked by the guest.
> Such a way we do not need to silently clear RSS and hash report
> features in case of 'vhost on'.
> Can you please review it and provide your feedback?
>
> I think the only open question is what to do with cases of vhost-user
> and vhost-vdpa.
>
> https://github.com/qemu/qemu/pull/105
> This pull request is for reviews only.

Unfortunately qemu github PR is closed for comments
This is the link to the same on Daynix repository

https://github.com/daynix/qemu/pull/1

>
> Thanks in advance
>
>
>
>
>
>
> On Mon, Jan 18, 2021 at 5:16 AM Jason Wang  wrote:
> >
> >
> > On 2021/1/17 下午5:04, Yuri Benditovich wrote:
> > > On Fri, Jan 15, 2021 at 9:20 AM Jason Wang  wrote:
> > >> On 2021/1/15 上午5:16, Andrew Melnychenko wrote:
> > >>> From: Andrew
> > >>>
> > >>> When RSS is enabled the device tries to load the eBPF program
> > >>> to select RX virtqueue in the TUN. If eBPF can be loaded
> > >>> the RSS will function also with vhost (works with kernel 5.8 and later).
> > >>> Software RSS is used as a fallback with vhost=off when eBPF can't be 
> > >>> loaded
> > >>> or when hash population requested by the guest.
> > >>>
> > >>> Signed-off-by: Yuri Benditovich
> > >>> Signed-off-by: Andrew Melnychenko
> > >>> ---
> > >>>hw/net/vhost_net.c |   2 +
> > >>>hw/net/virtio-net.c| 125 
> > >>> +++--
> > >>>include/hw/virtio/virtio-net.h |   4 ++
> > >>>net/vhost-vdpa.c   |   2 +
> > >>>4 files changed, 129 insertions(+), 4 deletions(-)
> > >>>
> > >>> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > >>> index 24d555e764..16124f99c3 100644
> > >>> --- a/hw/net/vhost_net.c
> > >>> +++ b/hw/net/vhost_net.c
> > >>> @@ -71,6 +71,8 @@ static const int user_feature_bits[] = {
> > >>>VIRTIO_NET_F_MTU,
> > >>>VIRTIO_F_IOMMU_PLATFORM,
> > >>>VIRTIO_F_RING_PACKED,
> > >>> +VIRTIO_NET_F_RSS,
> > >>> +VIRTIO_NET_F_HASH_REPORT,
> > >>>
> > >>>/* This bit implies RARP isn't sent by QEMU out of band */
> > >>>VIRTIO_NET_F_GUEST_ANNOUNCE,
> > >>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > >>> index 09ceb02c9d..37016fc73a 100644
> > >>> --- a/hw/net/virtio-net.c
> > >>> +++ b/hw/net/virtio-net.c
> > >>> @@ -691,6 +691,19 @@ static void virtio_net_set_queues(VirtIONet *n)
> > >>>
> > >>>static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
> > >>>
> > >>> +static uint64_t fix_ebpf_vhost_features(uint64_t features)
> > >>> +{
> > >>> +/* If vhost=on & CONFIG_EBPF doesn't set - disable RSS feature */
> > >> I still think we should not clear feature silently. This may break
> > >> migraiton if the feature is cleared on destination.
> > > Do I understand it correctly that if we do not clear features silently
> > > and implement a graceful drop to vhost=off when we can't do what we
> > > need with vhost - then we do not need to add any migration blocker?
> >
> >
> > Yes. I think we won't go with migration blocker since we need support
> > migration in the end.
> >
> > Thanks
> >
> >
> > >
> >

Re: [RFC PATCH v3 5/6] virtio-net: Added eBPF RSS to virtio-net.

2021-01-24 Thread Yuri Benditovich

Hi Jason,

I've prepared a POC of graceful switch to 'vhost off' if respective
features are acked by the guest.
Such a way we do not need to silently clear RSS and hash report
features in case of 'vhost on'.
Can you please review it and provide your feedback?

I think the only open question is what to do with cases of vhost-user
and vhost-vdpa.

https://github.com/qemu/qemu/pull/105
This pull request is for reviews only.

Thanks in advance






On Mon, Jan 18, 2021 at 5:16 AM Jason Wang  wrote:
>
>
> On 2021/1/17 下午5:04, Yuri Benditovich wrote:
> > On Fri, Jan 15, 2021 at 9:20 AM Jason Wang  wrote:
> >> On 2021/1/15 上午5:16, Andrew Melnychenko wrote:
> >>> From: Andrew
> >>>
> >>> When RSS is enabled the device tries to load the eBPF program
> >>> to select RX virtqueue in the TUN. If eBPF can be loaded
> >>> the RSS will function also with vhost (works with kernel 5.8 and later).
> >>> Software RSS is used as a fallback with vhost=off when eBPF can't be 
> >>> loaded
> >>> or when hash population requested by the guest.
> >>>
> >>> Signed-off-by: Yuri Benditovich
> >>> Signed-off-by: Andrew Melnychenko
> >>> ---
> >>>hw/net/vhost_net.c |   2 +
> >>>hw/net/virtio-net.c| 125 +++--
> >>>include/hw/virtio/virtio-net.h |   4 ++
> >>>net/vhost-vdpa.c   |   2 +
> >>>4 files changed, 129 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> >>> index 24d555e764..16124f99c3 100644
> >>> --- a/hw/net/vhost_net.c
> >>> +++ b/hw/net/vhost_net.c
> >>> @@ -71,6 +71,8 @@ static const int user_feature_bits[] = {
> >>>VIRTIO_NET_F_MTU,
> >>>VIRTIO_F_IOMMU_PLATFORM,
> >>>VIRTIO_F_RING_PACKED,
> >>> +VIRTIO_NET_F_RSS,
> >>> +VIRTIO_NET_F_HASH_REPORT,
> >>>
> >>>/* This bit implies RARP isn't sent by QEMU out of band */
> >>>VIRTIO_NET_F_GUEST_ANNOUNCE,
> >>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> >>> index 09ceb02c9d..37016fc73a 100644
> >>> --- a/hw/net/virtio-net.c
> >>> +++ b/hw/net/virtio-net.c
> >>> @@ -691,6 +691,19 @@ static void virtio_net_set_queues(VirtIONet *n)
> >>>
> >>>static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
> >>>
> >>> +static uint64_t fix_ebpf_vhost_features(uint64_t features)
> >>> +{
> >>> +/* If vhost=on & CONFIG_EBPF doesn't set - disable RSS feature */
> >> I still think we should not clear feature silently. This may break
> >> migraiton if the feature is cleared on destination.
> > Do I understand it correctly that if we do not clear features silently
> > and implement a graceful drop to vhost=off when we can't do what we
> > need with vhost - then we do not need to add any migration blocker?
>
>
> Yes. I think we won't go with migration blocker since we need support
> migration in the end.
>
> Thanks
>
>
> >
>

Re: [RFC PATCH v3 4/6] ebpf: Added eBPF RSS loader.

2021-01-19 Thread Yuri Benditovich

On Fri, Jan 15, 2021 at 9:02 AM Jason Wang  wrote:
>
>
> On 2021/1/15 上午5:16, Andrew Melnychenko wrote:
> > From: Andrew 
> >
> > Added function that loads RSS eBPF program.
> > Added stub functions for RSS eBPF loader.
> > Added meson and configuration options.
> >
> > By default, eBPF feature enabled if libbpf is present in the build system.
> > libbpf checked in configuration shell script and meson script.
> >
> > Signed-off-by: Yuri Benditovich 
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >   configure   |  33 
> >   ebpf/ebpf_rss-stub.c|  40 
> >   ebpf/ebpf_rss.c | 165 +
> >   ebpf/ebpf_rss.h |  44 +
> >   ebpf/meson.build|   1 +
> >   ebpf/rss.bpf.skeleton.h | 397 
> >   ebpf/trace-events   |   4 +
> >   ebpf/trace.h|   2 +
> >   meson.build |  13 ++
> >   9 files changed, 699 insertions(+)
> >   create mode 100644 ebpf/ebpf_rss-stub.c
> >   create mode 100644 ebpf/ebpf_rss.c
> >   create mode 100644 ebpf/ebpf_rss.h
> >   create mode 100644 ebpf/meson.build
> >   create mode 100644 ebpf/rss.bpf.skeleton.h
> >   create mode 100644 ebpf/trace-events
> >   create mode 100644 ebpf/trace.h
> >
> > diff --git a/configure b/configure
> > index 5860bdb77b..9d18e941f5 100755
> > --- a/configure
> > +++ b/configure
> > @@ -342,6 +342,7 @@ vhost_vsock="$default_feature"
> >   vhost_user="no"
> >   vhost_user_blk_server="auto"
> >   vhost_user_fs="$default_feature"
> > +bpf=""
> >   kvm="auto"
> >   hax="auto"
> >   hvf="auto"
> > @@ -1236,6 +1237,10 @@ for opt do
> > ;;
> > --enable-membarrier) membarrier="yes"
> > ;;
> > +  --disable-bpf) bpf="no"
> > +  ;;
> > +  --enable-bpf) bpf="yes"
> > +  ;;
> > --disable-blobs) blobs="false"
> > ;;
> > --with-pkgversion=*) pkgversion="$optarg"
> > @@ -1845,6 +1850,7 @@ disabled with --disable-FEATURE, default is enabled 
> > if available
> > vhost-user  vhost-user backend support
> > vhost-user-blk-servervhost-user-blk server support
> > vhost-vdpa  vhost-vdpa kernel backend support
> > +  bpf BPF kernel support
> > spice   spice
> > rbd rados block device (rbd)
> > libiscsiiscsi support
> > @@ -5057,6 +5063,30 @@ else
> >   membarrier=no
> >   fi
> >
> > +##
> > +# check for usable bpf system call
> > +if test "$bpf" = ""; then
>
>
> This implies the bpf is enabled by default?

Yes, assuming libbpf-devel present and bpf system call defined.

Any problem with it?

>
>
> > +have_bpf=no
> > +if test "$linux" = "yes" -a "$bigendian" != "yes"; then
> > +cat > $TMPC << EOF
> > +#include 
> > +#include 
> > +int main(void) {
> > +struct bpf_object *obj = NULL;
> > +bpf_object__load(obj);
> > +exit(0);
> > +}
> > +EOF
> > +if compile_prog "" "-lbpf" ; then
> > +have_bpf=yes
> > +bpf=yes
> > +fi
> > +fi
> > +if test "$have_bpf" = "no"; then
> > +  feature_not_found "bpf" "the libbpf is not available"
> > +fi
> > +fi
> > +
> >   ######
> >   # check if rtnetlink.h exists and is useful
> >   have_rtnetlink=no
> > @@ -5905,6 +5935,9 @@ fi
> >   if test "$membarrier" = "yes" ; then
> > echo "CONFIG_MEMBARRIER=y" >> $config_host_mak
> >   fi
> > +if test "$bpf" = "yes" -a "$bigendian" != "yes" -a "$linux" = "yes" ; then
> > +  echo "CONFIG_EBPF=y" >> $config_host_mak
> > +fi
> >   if test "$signalfd" = "yes" ; then
> > echo "CONFIG_SIGNALFD=y" >> $config_host_mak
> >   fi
> > diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
> > new file mode 100644
> > index 00..e71e229190
> > --- /dev/null
> > +++ b/ebpf/ebpf_rss-stub.c
> > @@ -0,0 +1,40 @@
> > +/*

Re: [RFC PATCH v3 5/6] virtio-net: Added eBPF RSS to virtio-net.

2021-01-17 Thread Yuri Benditovich

On Fri, Jan 15, 2021 at 9:20 AM Jason Wang  wrote:
>
>
> On 2021/1/15 上午5:16, Andrew Melnychenko wrote:
> > From: Andrew 
> >
> > When RSS is enabled the device tries to load the eBPF program
> > to select RX virtqueue in the TUN. If eBPF can be loaded
> > the RSS will function also with vhost (works with kernel 5.8 and later).
> > Software RSS is used as a fallback with vhost=off when eBPF can't be loaded
> > or when hash population requested by the guest.
> >
> > Signed-off-by: Yuri Benditovich 
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >   hw/net/vhost_net.c |   2 +
> >   hw/net/virtio-net.c| 125 +++--
> >   include/hw/virtio/virtio-net.h |   4 ++
> >   net/vhost-vdpa.c   |   2 +
> >   4 files changed, 129 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 24d555e764..16124f99c3 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -71,6 +71,8 @@ static const int user_feature_bits[] = {
> >   VIRTIO_NET_F_MTU,
> >   VIRTIO_F_IOMMU_PLATFORM,
> >   VIRTIO_F_RING_PACKED,
> > +VIRTIO_NET_F_RSS,
> > +VIRTIO_NET_F_HASH_REPORT,
> >
> >   /* This bit implies RARP isn't sent by QEMU out of band */
> >   VIRTIO_NET_F_GUEST_ANNOUNCE,
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 09ceb02c9d..37016fc73a 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -691,6 +691,19 @@ static void virtio_net_set_queues(VirtIONet *n)
> >
> >   static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
> >
> > +static uint64_t fix_ebpf_vhost_features(uint64_t features)
> > +{
> > +/* If vhost=on & CONFIG_EBPF doesn't set - disable RSS feature */
>
>
> I still think we should not clear feature silently. This may break
> migraiton if the feature is cleared on destination.

Do I understand it correctly that if we do not clear features silently
and implement a graceful drop to vhost=off when we can't do what we
need with vhost - then we do not need to add any migration blocker?

>
>
> > +uint64_t ret = features;
> > +#ifndef CONFIG_EBPF
> > +virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > +#endif
> > +/* for now, there is no solution for populating the hash from eBPF */
> > +virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
> > +
> > +return ret;
> > +}
> > +
> >   static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t 
> > features,
> >   Error **errp)
> >   {
> > @@ -725,9 +738,9 @@ static uint64_t virtio_net_get_features(VirtIODevice 
> > *vdev, uint64_t features,
> >   return features;
> >   }
> >
> > -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > -virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
> > -features = vhost_net_get_features(get_vhost_net(nc->peer), features);
> > +features = fix_ebpf_vhost_features(
> > +vhost_net_get_features(get_vhost_net(nc->peer), features));
> > +
> >   vdev->backend_features = features;
> >
> >   if (n->mtu_bypass_backend &&
> > @@ -1151,12 +1164,79 @@ static int virtio_net_handle_announce(VirtIONet *n, 
> > uint8_t cmd,
> >   }
> >   }
> >
> > +static void virtio_net_detach_epbf_rss(VirtIONet *n);
> > +
> >   static void virtio_net_disable_rss(VirtIONet *n)
> >   {
> >   if (n->rss_data.enabled) {
> >   trace_virtio_net_rss_disable();
> >   }
> >   n->rss_data.enabled = false;
> > +
> > +virtio_net_detach_epbf_rss(n);
> > +}
> > +
> > +static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
> > +{
> > +NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
> > +if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
> > +return false;
> > +}
> > +
> > +return nc->info->set_steering_ebpf(nc, prog_fd);
> > +}
> > +
> > +static void rss_data_to_rss_config(struct VirtioNetRssData *data,
> > +   struct EBPFRSSConfig *config)
> > +{
> > +config->redirect = data->redirect;
> > +config->populate_hash = data->populate_hash;
> > +config->hash_types = data->hash_types;
> > +config->indirections_len = data->indirections_len;
> > +

Re: [RFC PATCH v2 0/5] eBPF RSS support for virtio-net

2020-12-06 Thread Yuri Benditovich

On Fri, Dec 4, 2020 at 3:57 PM Toke Høiland-Jørgensen 
wrote:

> Yuri Benditovich  writes:
>
> > On Fri, Dec 4, 2020 at 12:09 PM Toke Høiland-Jørgensen 
> > wrote:
> >
> >> Yuri Benditovich  writes:
> >>
> >> > On Wed, Dec 2, 2020 at 4:18 PM Toke Høiland-Jørgensen <
> t...@redhat.com>
> >> > wrote:
> >> >
> >> >> Jason Wang  writes:
> >> >>
> >> >> > On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> >> >> >> This set of patches introduces the usage of eBPF for packet
> steering
> >> >> >> and RSS hash calculation:
> >> >> >> * RSS(Receive Side Scaling) is used to distribute network packets
> to
> >> >> >> guest virtqueues by calculating packet hash
> >> >> >> * Additionally adding support for the usage of RSS with vhost
> >> >> >>
> >> >> >> The eBPF works on kernels 5.8+
> >> >> >> On earlier kerneld it fails to load and the RSS feature is
> reported
> >> >> >> only without vhost and implemented in 'in-qemu' software.
> >> >> >>
> >> >> >> Implementation notes:
> >> >> >> Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF
> program.
> >> >> >> Added libbpf dependency and eBPF support.
> >> >> >> The eBPF program is part of the qemu and presented as an array
> >> >> >> of BPF ELF file data.
> >> >> >> The compilation of eBPF is not part of QEMU build and can be done
> >> >> >> using provided Makefile.ebpf(need to adjust 'linuxhdrs').
> >> >> >> Added changes to virtio-net and vhost, primary eBPF RSS is used.
> >> >> >> 'in-qemu' RSS used in the case of hash population and as a
> fallback
> >> >> option.
> >> >> >> For vhost, the hash population feature is not reported to the
> guest.
> >> >> >>
> >> >> >> Please also see the documentation in PATCH 5/5.
> >> >> >>
> >> >> >> I am sending those patches as RFC to initiate the discussions and
> get
> >> >> >> feedback on the following points:
> >> >> >> * Fallback when eBPF is not supported by the kernel
> >> >> >> * Live migration to the kernel that doesn't have eBPF support
> >> >> >> * Integration with current QEMU build
> >> >> >> * Additional usage for eBPF for packet filtering
> >> >> >>
> >> >> >> Known issues:
> >> >> >> * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> >> >> >> as a fallback, also, hash population feature is not reported to
> >> guests
> >> >> >> with vhost.
> >> >> >> * big-endian BPF support: for now, eBPF isn't supported on
> >> >> >> big-endian systems. Can be added in future if required.
> >> >> >> * huge .h file with eBPF binary. The size of .h file containing
> >> >> >> eBPF binary is currently ~5K lines, because the binary is built
> with
> >> >> debug information.
> >> >> >> The binary without debug/BTF info can't be loaded by libbpf.
> >> >> >> We're looking for possibilities to reduce the size of the .h
> files.
> >> >> >
> >> >> >
> >> >> > Adding Toke for sharing more idea from eBPF side.
> >> >> >
> >> >> > We had some discussion on the eBPF issues:
> >> >> >
> >> >> > 1) Whether or not to use libbpf. Toke strongly suggest to use
> libbpf
> >> >> > 2) Whether or not to use BTF. Toke confirmed that if we don't
> access
> >> any
> >> >> > skb metadata, BTF is not strictly required for CO-RE. But it might
> >> still
> >> >> > useful for e.g debugging.
> >> >> > 3) About the huge (5K lines, see patch #2 Toke). Toke confirmed
> that
> >> we
> >> >> > can strip debug symbols, but Yuri found some sections can't be
> >> stripped,
> >> >> > we can keep discussing here.
> >> >>
> >> >> I just tried simply running 'strip' on a sample trivial XDP program,
> >> >> which brought its size down from ~5k to ~1k and preserved the BTF
> >> >> information w

Re: [RFC PATCH v2 0/5] eBPF RSS support for virtio-net

2020-12-04 Thread Yuri Benditovich

On Fri, Dec 4, 2020 at 12:09 PM Toke Høiland-Jørgensen 
wrote:

> Yuri Benditovich  writes:
>
> > On Wed, Dec 2, 2020 at 4:18 PM Toke Høiland-Jørgensen 
> > wrote:
> >
> >> Jason Wang  writes:
> >>
> >> > On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> >> >> This set of patches introduces the usage of eBPF for packet steering
> >> >> and RSS hash calculation:
> >> >> * RSS(Receive Side Scaling) is used to distribute network packets to
> >> >> guest virtqueues by calculating packet hash
> >> >> * Additionally adding support for the usage of RSS with vhost
> >> >>
> >> >> The eBPF works on kernels 5.8+
> >> >> On earlier kerneld it fails to load and the RSS feature is reported
> >> >> only without vhost and implemented in 'in-qemu' software.
> >> >>
> >> >> Implementation notes:
> >> >> Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF program.
> >> >> Added libbpf dependency and eBPF support.
> >> >> The eBPF program is part of the qemu and presented as an array
> >> >> of BPF ELF file data.
> >> >> The compilation of eBPF is not part of QEMU build and can be done
> >> >> using provided Makefile.ebpf(need to adjust 'linuxhdrs').
> >> >> Added changes to virtio-net and vhost, primary eBPF RSS is used.
> >> >> 'in-qemu' RSS used in the case of hash population and as a fallback
> >> option.
> >> >> For vhost, the hash population feature is not reported to the guest.
> >> >>
> >> >> Please also see the documentation in PATCH 5/5.
> >> >>
> >> >> I am sending those patches as RFC to initiate the discussions and get
> >> >> feedback on the following points:
> >> >> * Fallback when eBPF is not supported by the kernel
> >> >> * Live migration to the kernel that doesn't have eBPF support
> >> >> * Integration with current QEMU build
> >> >> * Additional usage for eBPF for packet filtering
> >> >>
> >> >> Known issues:
> >> >> * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> >> >> as a fallback, also, hash population feature is not reported to
> guests
> >> >> with vhost.
> >> >> * big-endian BPF support: for now, eBPF isn't supported on
> >> >> big-endian systems. Can be added in future if required.
> >> >> * huge .h file with eBPF binary. The size of .h file containing
> >> >> eBPF binary is currently ~5K lines, because the binary is built with
> >> debug information.
> >> >> The binary without debug/BTF info can't be loaded by libbpf.
> >> >> We're looking for possibilities to reduce the size of the .h files.
> >> >
> >> >
> >> > Adding Toke for sharing more idea from eBPF side.
> >> >
> >> > We had some discussion on the eBPF issues:
> >> >
> >> > 1) Whether or not to use libbpf. Toke strongly suggest to use libbpf
> >> > 2) Whether or not to use BTF. Toke confirmed that if we don't access
> any
> >> > skb metadata, BTF is not strictly required for CO-RE. But it might
> still
> >> > useful for e.g debugging.
> >> > 3) About the huge (5K lines, see patch #2 Toke). Toke confirmed that
> we
> >> > can strip debug symbols, but Yuri found some sections can't be
> stripped,
> >> > we can keep discussing here.
> >>
> >> I just tried simply running 'strip' on a sample trivial XDP program,
> >> which brought its size down from ~5k to ~1k and preserved the BTF
> >> information without me having to do anything.
> >>
> >
> > With our eBPF code the numbers are slightly different:
> > The code size without BTF: 7.5K (built without '-g')
> > Built with '-g': 45K
> > Stripped: 19K
> > The difference between 7.5 and 19K still seems significant, especially
> when
> > we do not use any kernel structures and do not need these BTF sections
>
> That does seem like a lot of BTF information. Did you confirm (with
> objdump) that it's the .BTF* sections that take up these extra 12k? Do
> you have some really complicated data structures in the file or
> something? Got a link to the source somewhere that isn't a web mailing
> list archive? :)
>
>
Looks like the extra size is related to BTF: there are 4 BTF sections that
take 12.5K
  [ 7] .BTF  PROGBITS

Re: [RFC PATCH v2 0/5] eBPF RSS support for virtio-net

2020-12-03 Thread Yuri Benditovich

On Wed, Dec 2, 2020 at 4:18 PM Toke Høiland-Jørgensen 
wrote:

> Jason Wang  writes:
>
> > On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> >> This set of patches introduces the usage of eBPF for packet steering
> >> and RSS hash calculation:
> >> * RSS(Receive Side Scaling) is used to distribute network packets to
> >> guest virtqueues by calculating packet hash
> >> * Additionally adding support for the usage of RSS with vhost
> >>
> >> The eBPF works on kernels 5.8+
> >> On earlier kerneld it fails to load and the RSS feature is reported
> >> only without vhost and implemented in 'in-qemu' software.
> >>
> >> Implementation notes:
> >> Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF program.
> >> Added libbpf dependency and eBPF support.
> >> The eBPF program is part of the qemu and presented as an array
> >> of BPF ELF file data.
> >> The compilation of eBPF is not part of QEMU build and can be done
> >> using provided Makefile.ebpf(need to adjust 'linuxhdrs').
> >> Added changes to virtio-net and vhost, primary eBPF RSS is used.
> >> 'in-qemu' RSS used in the case of hash population and as a fallback
> option.
> >> For vhost, the hash population feature is not reported to the guest.
> >>
> >> Please also see the documentation in PATCH 5/5.
> >>
> >> I am sending those patches as RFC to initiate the discussions and get
> >> feedback on the following points:
> >> * Fallback when eBPF is not supported by the kernel
> >> * Live migration to the kernel that doesn't have eBPF support
> >> * Integration with current QEMU build
> >> * Additional usage for eBPF for packet filtering
> >>
> >> Known issues:
> >> * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> >> as a fallback, also, hash population feature is not reported to guests
> >> with vhost.
> >> * big-endian BPF support: for now, eBPF isn't supported on
> >> big-endian systems. Can be added in future if required.
> >> * huge .h file with eBPF binary. The size of .h file containing
> >> eBPF binary is currently ~5K lines, because the binary is built with
> debug information.
> >> The binary without debug/BTF info can't be loaded by libbpf.
> >> We're looking for possibilities to reduce the size of the .h files.
> >
> >
> > Adding Toke for sharing more idea from eBPF side.
> >
> > We had some discussion on the eBPF issues:
> >
> > 1) Whether or not to use libbpf. Toke strongly suggest to use libbpf
> > 2) Whether or not to use BTF. Toke confirmed that if we don't access any
> > skb metadata, BTF is not strictly required for CO-RE. But it might still
> > useful for e.g debugging.
> > 3) About the huge (5K lines, see patch #2 Toke). Toke confirmed that we
> > can strip debug symbols, but Yuri found some sections can't be stripped,
> > we can keep discussing here.
>
> I just tried simply running 'strip' on a sample trivial XDP program,
> which brought its size down from ~5k to ~1k and preserved the BTF
> information without me having to do anything.
>

With our eBPF code the numbers are slightly different:
The code size without BTF: 7.5K (built without '-g')
Built with '-g': 45K
Stripped: 19K
The difference between 7.5 and 19K still seems significant, especially when
we do not use any kernel structures and do not need these BTF sections
This is only reason to prefer non-libbpf option for this specific eBPF



>
> As a side note, though, instead of embedding the BPF program into a .h,
> you could simply ship it as a .o and load it from the file system. We do
> that with xdp-tools and install the bpf object files into /usr/$LIB/bpf/.
>

Yes, we've discussed this option and decided to go with embedding the BPF
https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg02157.html


> -Toke
>
>

Re: [RFC PATCH v2 4/5] virtio-net: Added eBPF RSS to virtio-net.

2020-12-01 Thread Yuri Benditovich

On Wed, Dec 2, 2020 at 6:06 AM Jason Wang  wrote:

>
> On 2020/12/1 下午3:40, Yuri Benditovich wrote:
> >
> >
> > On Tue, Nov 24, 2020 at 10:49 AM Jason Wang  > <mailto:jasow...@redhat.com>> wrote:
> >
> >
> > On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> > > From: Andrew mailto:and...@daynix.com>>
> > >
> > > When RSS is enabled the device tries to load the eBPF program
> > > to select RX virtqueue in the TUN. If eBPF can be loaded
> > > the RSS will function also with vhost (works with kernel 5.8 and
> > later).
> > > Software RSS is used as a fallback with vhost=off when eBPF
> > can't be loaded
> > > or when hash population requested by the guest.
> > >
> > > Signed-off-by: Yuri Benditovich  > <mailto:yuri.benditov...@daynix.com>>
> > > Signed-off-by: Andrew Melnychenko  > <mailto:and...@daynix.com>>
> > > ---
> > >   hw/net/vhost_net.c |   2 +
> > >   hw/net/virtio-net.c| 120
> > +++--
> > >   include/hw/virtio/virtio-net.h |   4 ++
> > >   net/vhost-vdpa.c   |   2 +
> > >   4 files changed, 124 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > index 24d555e764..16124f99c3 100644
> > > --- a/hw/net/vhost_net.c
> > > +++ b/hw/net/vhost_net.c
> > > @@ -71,6 +71,8 @@ static const int user_feature_bits[] = {
> > >   VIRTIO_NET_F_MTU,
> > >   VIRTIO_F_IOMMU_PLATFORM,
> > >   VIRTIO_F_RING_PACKED,
> > > +VIRTIO_NET_F_RSS,
> > > +VIRTIO_NET_F_HASH_REPORT,
> > >
> > >   /* This bit implies RARP isn't sent by QEMU out of band */
> > >   VIRTIO_NET_F_GUEST_ANNOUNCE,
> > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > > index 277289d56e..afcc3032ec 100644
> > > --- a/hw/net/virtio-net.c
> > > +++ b/hw/net/virtio-net.c
> > > @@ -698,6 +698,19 @@ static void virtio_net_set_queues(VirtIONet
> *n)
> > >
> > >   static void virtio_net_set_multiqueue(VirtIONet *n, int
> > multiqueue);
> > >
> > > +static uint64_t fix_ebpf_vhost_features(uint64_t features)
> > > +{
> > > +/* If vhost=on & CONFIG_EBPF doesn't set - disable RSS
> > feature */
> > > +uint64_t ret = features;
> > > +#ifndef CONFIG_EBPF
> > > +virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > > +#endif
> > > +/* for now, there is no solution for populating the hash
> > from eBPF */
> > > +virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
> >
> >
> > I think there's still some misunderstanding here.
> >
> > When "rss" is enabled via command line, qemu can't not turn it off
> > silently, otherwise it may break migration. Instead, qemu should
> > disable
> > vhost-net if eBPF can't be loaded.
> >
> > When "hash_report" is enabled via command line, qemu should disable
> > vhost-net unconditionally.
> >
> >
> > I agree in general with this requirement and I'm preparing an
> > implementation of such fallback.
> >
> > The problem is that qemu already uses the mechanism of turning off
> > host features
> > silently if they are not supported by the current vhost in kernel:
> >
> https://github.com/qemu/qemu/blob/b0f8c22d6d4d07f3bd2307bcc62e1660ef965472/hw/virtio/vhost.c#L1526
> >
> > Can you please comment on it and let me know how it should be modified
> > in future?
> > I've planned to use it in next work (implementing hash report in kernel)
>
>
> This looks like a bug that needs to be solved. Otherwise we break
> migration from rss=on, vhost=off to rss=on,vhost=on.
>
> I think I need to fill the gap in my understanding of migration's
prerequisites.
According to
https://github.com/qemu/qemu/blob/b0f8c22d6d4d07f3bd2307bcc62e1660ef965472/docs/devel/migration.rst
"... QEMU has to be launched with the same arguments the two times ..." and
we test the migration during development
according to this statement.
What are the real requirements and prerequisites of the migration?


> I think you can keep the current code as is and I will try to seek a way
> to solve the issue.
>
> Thanks
>
>

Re: [RFC PATCH v2 4/5] virtio-net: Added eBPF RSS to virtio-net.

2020-11-30 Thread Yuri Benditovich

On Tue, Nov 24, 2020 at 10:49 AM Jason Wang  wrote:

>
> On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> > From: Andrew 
> >
> > When RSS is enabled the device tries to load the eBPF program
> > to select RX virtqueue in the TUN. If eBPF can be loaded
> > the RSS will function also with vhost (works with kernel 5.8 and later).
> > Software RSS is used as a fallback with vhost=off when eBPF can't be
> loaded
> > or when hash population requested by the guest.
> >
> > Signed-off-by: Yuri Benditovich 
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >   hw/net/vhost_net.c |   2 +
> >   hw/net/virtio-net.c| 120 +++--
> >   include/hw/virtio/virtio-net.h |   4 ++
> >   net/vhost-vdpa.c   |   2 +
> >   4 files changed, 124 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index 24d555e764..16124f99c3 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -71,6 +71,8 @@ static const int user_feature_bits[] = {
> >   VIRTIO_NET_F_MTU,
> >   VIRTIO_F_IOMMU_PLATFORM,
> >   VIRTIO_F_RING_PACKED,
> > +VIRTIO_NET_F_RSS,
> > +VIRTIO_NET_F_HASH_REPORT,
> >
> >   /* This bit implies RARP isn't sent by QEMU out of band */
> >   VIRTIO_NET_F_GUEST_ANNOUNCE,
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 277289d56e..afcc3032ec 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -698,6 +698,19 @@ static void virtio_net_set_queues(VirtIONet *n)
> >
> >   static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
> >
> > +static uint64_t fix_ebpf_vhost_features(uint64_t features)
> > +{
> > +/* If vhost=on & CONFIG_EBPF doesn't set - disable RSS feature */
> > +uint64_t ret = features;
> > +#ifndef CONFIG_EBPF
> > +virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > +#endif
> > +/* for now, there is no solution for populating the hash from eBPF
> */
> > +virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
>
>
> I think there's still some misunderstanding here.
>
> When "rss" is enabled via command line, qemu can't not turn it off
> silently, otherwise it may break migration. Instead, qemu should disable
> vhost-net if eBPF can't be loaded.
>
> When "hash_report" is enabled via command line, qemu should disable
> vhost-net unconditionally.
>
>
I agree in general with this requirement and I'm preparing an
implementation of such fallback.

The problem is that qemu already uses the mechanism of turning off host
features
silently if they are not supported by the current vhost in kernel:
https://github.com/qemu/qemu/blob/b0f8c22d6d4d07f3bd2307bcc62e1660ef965472/hw/virtio/vhost.c#L1526

Can you please comment on it and let me know how it should be modified in
future?
I've planned to use it in next work (implementing hash report in kernel)


>
> > +
> > +return ret;
> > +}
> > +
> >   static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t
> features,
> >   Error **errp)
> >   {
> > @@ -732,9 +745,9 @@ static uint64_t virtio_net_get_features(VirtIODevice
> *vdev, uint64_t features,
> >   return features;
> >   }
> >
> > -virtio_clear_feature(, VIRTIO_NET_F_RSS);
> > -virtio_clear_feature(, VIRTIO_NET_F_HASH_REPORT);
> > -features = vhost_net_get_features(get_vhost_net(nc->peer),
> features);
> > +features = fix_ebpf_vhost_features(
> > +vhost_net_get_features(get_vhost_net(nc->peer), features));
> > +
> >   vdev->backend_features = features;
> >
> >   if (n->mtu_bypass_backend &&
> > @@ -1169,12 +1182,75 @@ static int virtio_net_handle_announce(VirtIONet
> *n, uint8_t cmd,
> >   }
> >   }
> >
> > +static void virtio_net_unload_epbf_rss(VirtIONet *n);
> > +
> >   static void virtio_net_disable_rss(VirtIONet *n)
> >   {
> >   if (n->rss_data.enabled) {
> >   trace_virtio_net_rss_disable();
> >   }
> >   n->rss_data.enabled = false;
> > +
> > +if (!n->rss_data.enabled_software_rss &&
> ebpf_rss_is_loaded(>ebpf_rss)) {
> > +virtio_net_unload_epbf_rss(n);
> > +}
> > +}
> > +
> > +static bool virtio_net_attach_steering_ebpf(NICState *nic, int prog_fd)
> > +{
> > +NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0)

Re: [RFC PATCH v2 0/5] eBPF RSS support for virtio-net

2020-11-26 Thread Yuri Benditovich

On Fri, Nov 27, 2020 at 6:36 AM Jason Wang  wrote:

>
> On 2020/11/26 下午8:52, Yuri Benditovich wrote:
> >
> >
> > On Mon, Nov 23, 2020 at 8:08 AM Jason Wang  > <mailto:jasow...@redhat.com>> wrote:
> >
> >
> > On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> > > This set of patches introduces the usage of eBPF for packet
> steering
> > > and RSS hash calculation:
> > > * RSS(Receive Side Scaling) is used to distribute network packets
> to
> > > guest virtqueues by calculating packet hash
> > > * Additionally adding support for the usage of RSS with vhost
> > >
> > > The eBPF works on kernels 5.8+
> > > On earlier kerneld it fails to load and the RSS feature is reported
> > > only without vhost and implemented in 'in-qemu' software.
> > >
> > > Implementation notes:
> > > Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF
> program.
> > > Added libbpf dependency and eBPF support.
> > > The eBPF program is part of the qemu and presented as an array
> > > of BPF ELF file data.
> > > The compilation of eBPF is not part of QEMU build and can be done
> > > using provided Makefile.ebpf(need to adjust 'linuxhdrs').
> > > Added changes to virtio-net and vhost, primary eBPF RSS is used.
> > > 'in-qemu' RSS used in the case of hash population and as a
> > fallback option.
> > > For vhost, the hash population feature is not reported to the
> guest.
> > >
> > > Please also see the documentation in PATCH 5/5.
> > >
> > > I am sending those patches as RFC to initiate the discussions
> > and get
> > > feedback on the following points:
> > > * Fallback when eBPF is not supported by the kernel
> > > * Live migration to the kernel that doesn't have eBPF support
> > > * Integration with current QEMU build
> > > * Additional usage for eBPF for packet filtering
> > >
> > > Known issues:
> > > * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> > > as a fallback, also, hash population feature is not reported to
> > guests
> > > with vhost.
> > > * big-endian BPF support: for now, eBPF isn't supported on
> > > big-endian systems. Can be added in future if required.
> > > * huge .h file with eBPF binary. The size of .h file containing
> > > eBPF binary is currently ~5K lines, because the binary is built
> > with debug information.
> > > The binary without debug/BTF info can't be loaded by libbpf.
> > > We're looking for possibilities to reduce the size of the .h files.
> >
> >
> > A question here, is this because the binary file contains DWARF
> > data? If
> > yes, is it a building or loading dependency? If it's latter, maybe we
> > can try to strip them out, anyhow it can't be recognized by kernel.
> >
> > Thanks
> >
> >
> > After some experiments we can see that stripping of debug sections
> > reduces the size of
> > ELF from ~45K to ~20K (we tried to strip more but the libbpf fails to
> > load it, libbpf needs BTF and symbols)
> > So I suggest to reevaluate the necessity of libbpf.
> > For this specific BPF it does not present advantage and we hardly can
> > create some reusable code
> > related to libbpf, i.e. any further BPF will need its own libbpf wrapper.
> > The BTF is really good feature and in case some later BPF will need an
> > access to kernel
> > structures it will use libbpf loader.
> > What you think about it?
>
>
> If we can find a way to use BTF without libbpf, it should be acceptable.
>
> But the point is that the RSS BPF does not need the BTF as it does not use
any kernel structures.
When we have, for example, filter BPF that will need the BTF - we'll  use
libbpf for it.
Anyway we do not have here any infrastructural code related to libbpf,



> Thanks
>
>
> >
> > >
> > > Changes since v1:
> > > * using libbpf instead of direct 'bpf' system call.
> > > * added libbpf dependency to the configure/meson scripts.
> > > * changed python script for eBPF .h file generation.
> > > * changed eBPF program - reading L3 proto from ethernet frame.
> > > * added TUNSETSTEERINGEBPF define for TUN.
> > > * changed the maintainer's info.
> > > * added license headers.
> > &

Re: [RFC PATCH v2 5/5] docs: Added eBPF documentation.

2020-11-26 Thread Yuri Benditovich

On Tue, Nov 24, 2020 at 10:55 AM Jason Wang  wrote:

>
> On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> > From: Andrew 
> >
> > Also, added maintainers information.
> >
> > Signed-off-by: Yuri Benditovich 
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >   MAINTAINERS   |   7 +++
> >   docs/ebpf_rss.rst | 133 ++
> >   2 files changed, 140 insertions(+)
> >   create mode 100644 docs/ebpf_rss.rst
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 2c22bbca5a..d93c85b867 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3111,6 +3111,13 @@ S: Maintained
> >   F: hw/semihosting/
> >   F: include/hw/semihosting/
> >
> > +EBPF:
> > +M: Jason Wang 
> > +R: Andrew Melnychenko 
> > +R: Yuri Benditovich 
> > +S: Maintained
> > +F: ebpf/*
> > +
> >   Build and test automation
> >   -
> >   Build and test automation
> > diff --git a/docs/ebpf_rss.rst b/docs/ebpf_rss.rst
> > new file mode 100644
> > index 00..f832defdf4
> > --- /dev/null
> > +++ b/docs/ebpf_rss.rst
> > @@ -0,0 +1,133 @@
> > +===
> > +eBPF RSS virtio-net support
> > +===
> > +
> > +RSS(Receive Side Scaling) is used to distribute network packets to
> guest virtqueues
> > +by calculating packet hash. Usually every queue is processed then by a
> specific guest CPU core.
> > +
> > +For now there are 2 RSS implementations in qemu:
> > +- 'in-qemu' RSS (functions if qemu receives network packets, i.e.
> vhost=off)
> > +- eBPF RSS (can function with also with vhost=on)
> > +
> > +eBPF support (CONFIG_EBPF) is enabled by 'configure' script.
> > +To enable eBPF RSS support use './configure --enable-bpf'.
> > +
> > +If steering BPF is not set for kernel's TUN module, the TUN uses
> automatic selection
> > +of rx virtqueue based on lookup table built according to calculated
> symmetric hash
> > +of transmitted packets.
> > +If steering BPF is set for TUN the BPF code calculates the hash of
> packet header and
> > +returns the virtqueue number to place the packet to.
> > +
> > +Simplified decision formula:
> > +
> > +.. code:: C
> > +
> > +queue_index = indirection_table[hash( data>)%]
> > +
> > +
> > +Not for all packets, the hash can/should be calculated.
> > +
> > +Note: currently, eBPF RSS does not support hash reporting.
> > +
> > +eBPF RSS turned on by different combinations of vhost-net, vitrio-net
> and tap configurations:
> > +
> > +- eBPF is used:
> > +
> > +tap,vhost=off & virtio-net-pci,rss=on,hash=off
> > +
> > +- eBPF is used:
> > +
> > +tap,vhost=on & virtio-net-pci,rss=on,hash=off
> > +
> > +- 'in-qemu' RSS is used:
> > +
> > +tap,vhost=off & virtio-net-pci,rss=on,hash=on
> > +
> > +- eBPF is used, hash population feature is not reported to the guest:
> > +
> > +tap,vhost=on & virtio-net-pci,rss=on,hash=on
> > +
> > +If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported.
> > +Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed
> to load or set to TUN.
> > +
> > +RSS eBPF program
> > +
> > +
> > +RSS program located in ebpf/tun_rss_steering.h as an array of 'struct
> bpf_insn'.
> > +So the program is part of the qemu binary.
> > +Initially, the eBPF program was compiled by clang and source code
> located at ebpf/rss.bpf.c.
> > +Prerequisites to recompile the eBPF program (regenerate
> ebpf/tun_rss_steering.h):
> > +
> > +llvm, clang, kernel source tree, python3 + (pip3 pyelftools)
> > +Adjust 'linuxhdrs' in Makefile.ebpf to reflect the location of
> the kernel source tree
> > +
> > +$ cd ebpf
> > +$ make -f Makefile.ebpf
> > +
> > +Note the python script for convertation from eBPF ELF object to '.h'
> file - Ebpf_to_C.py:
> > +
> > +$ python EbpfElf_to_C.py rss.bpf.o tun_rss_steering
> > +
> > +The first argument of the script is ELF object, second - section name
> where the eBPF program located.
> > +The script would generate .h file with eBPF instructions
> and 'relocate array'.
> > +'relocate array' is an array of 'struct fixup_mapfd_t' with the name of
> the eBPF map and instruction offset where the file descriptor of the map
> should be placed.
> &

Re: [RFC PATCH v2 0/5] eBPF RSS support for virtio-net

2020-11-26 Thread Yuri Benditovich

On Mon, Nov 23, 2020 at 8:08 AM Jason Wang  wrote:

>
> On 2020/11/19 下午7:13, Andrew Melnychenko wrote:
> > This set of patches introduces the usage of eBPF for packet steering
> > and RSS hash calculation:
> > * RSS(Receive Side Scaling) is used to distribute network packets to
> > guest virtqueues by calculating packet hash
> > * Additionally adding support for the usage of RSS with vhost
> >
> > The eBPF works on kernels 5.8+
> > On earlier kerneld it fails to load and the RSS feature is reported
> > only without vhost and implemented in 'in-qemu' software.
> >
> > Implementation notes:
> > Linux TAP TUNSETSTEERINGEBPF ioctl was used to set the eBPF program.
> > Added libbpf dependency and eBPF support.
> > The eBPF program is part of the qemu and presented as an array
> > of BPF ELF file data.
> > The compilation of eBPF is not part of QEMU build and can be done
> > using provided Makefile.ebpf(need to adjust 'linuxhdrs').
> > Added changes to virtio-net and vhost, primary eBPF RSS is used.
> > 'in-qemu' RSS used in the case of hash population and as a fallback
> option.
> > For vhost, the hash population feature is not reported to the guest.
> >
> > Please also see the documentation in PATCH 5/5.
> >
> > I am sending those patches as RFC to initiate the discussions and get
> > feedback on the following points:
> > * Fallback when eBPF is not supported by the kernel
> > * Live migration to the kernel that doesn't have eBPF support
> > * Integration with current QEMU build
> > * Additional usage for eBPF for packet filtering
> >
> > Known issues:
> > * hash population not supported by eBPF RSS: 'in-qemu' RSS used
> > as a fallback, also, hash population feature is not reported to guests
> > with vhost.
> > * big-endian BPF support: for now, eBPF isn't supported on
> > big-endian systems. Can be added in future if required.
> > * huge .h file with eBPF binary. The size of .h file containing
> > eBPF binary is currently ~5K lines, because the binary is built with
> debug information.
> > The binary without debug/BTF info can't be loaded by libbpf.
> > We're looking for possibilities to reduce the size of the .h files.
>
>
> A question here, is this because the binary file contains DWARF data? If
> yes, is it a building or loading dependency? If it's latter, maybe we
> can try to strip them out, anyhow it can't be recognized by kernel.
>
> Thanks
>
>
After some experiments we can see that stripping of debug sections reduces
the size of
ELF from ~45K to ~20K (we tried to strip more but the libbpf fails to load
it, libbpf needs BTF and symbols)
So I suggest to reevaluate the necessity of libbpf.
For this specific BPF it does not present advantage and we hardly can
create some reusable code
related to libbpf, i.e. any further BPF will need its own libbpf wrapper.
The BTF is really good feature and in case some later BPF will need an
access to kernel
structures it will use libbpf loader.
What you think about it?


>
> >
> > Changes since v1:
> > * using libbpf instead of direct 'bpf' system call.
> > * added libbpf dependency to the configure/meson scripts.
> > * changed python script for eBPF .h file generation.
> > * changed eBPF program - reading L3 proto from ethernet frame.
> > * added TUNSETSTEERINGEBPF define for TUN.
> > * changed the maintainer's info.
> > * added license headers.
> > * refactored code.
> >
> > Andrew (5):
> >net: Added SetSteeringEBPF method for NetClientState.
> >ebpf: Added eBPF RSS program.
> >ebpf: Added eBPF RSS loader.
> >virtio-net: Added eBPF RSS to virtio-net.
> >docs: Added eBPF RSS documentation.
> >
> >   MAINTAINERS|7 +
> >   configure  |   33 +
> >   docs/ebpf_rss.rst  |  133 +
> >   ebpf/EbpfElf_to_C.py   |   36 +
> >   ebpf/Makefile.ebpf |   33 +
> >   ebpf/ebpf_rss-stub.c   |   40 +
> >   ebpf/ebpf_rss.c|  186 ++
> >   ebpf/ebpf_rss.h|   44 +
> >   ebpf/meson.build   |1 +
> >   ebpf/rss.bpf.c |  505 +++
> >   ebpf/tun_rss_steering.h| 5439 
> >   hw/net/vhost_net.c |2 +
> >   hw/net/virtio-net.c|  120 +-
> >   include/hw/virtio/virtio-net.h |4 +
> >   include/net/net.h  |2 +
> >   meson.build|   11 +
> >   net/tap-bsd.c  |5 +
> >   net/tap-linux.c|   13 +
> >   net/tap-linux.h|1 +
> >   net/tap-solaris.c  |5 +
> >   net/tap-stub.c |5 +
> >   net/tap.c  |9 +
> >   net/tap_int.h  |1 +
> >   net/vhost-vdpa.c   |2 +
> >   24 files changed, 6633 insertions(+), 4 deletions(-)
> >   create mode 100644 docs/ebpf_rss.rst
> >   create mode 100644 ebpf/EbpfElf_to_C.py
> >   create mode 100755 ebpf/Makefile.ebpf
> >   create mode 100644 ebpf/ebpf_rss-stub.c
>

1 2 3 >

1 - 100 of 282 matches

Mail list logo