Re: [PATCH net-next v20 05/12] virtio_net: Query and set flow filter caps

Michael S. Tsirkin Sun, 08 Feb 2026 03:51:40 -0800

On Thu, Feb 05, 2026 at 04:47:00PM -0600, Daniel Jurgens wrote:
> When probing a virtnet device, attempt to read the flow filter
> capabilities. In order to use the feature the caps must also
> be set. For now setting what was read is sufficient.
> 
> This patch adds uapi definitions virtio_net flow filters define in
> version 1.4 of the VirtIO spec.
> 
> Signed-off-by: Daniel Jurgens <[email protected]>
> Reviewed-by: Parav Pandit <[email protected]>
> Reviewed-by: Shahar Shitrit <[email protected]>
> 
> ---
> v4:
>     - Validate the length in the selector caps
>     - Removed __free usage.
>     - Removed for(int.
> v5:
>     - Remove unneed () after MAX_SEL_LEN macro (test bot)
> v6:
>     - Fix sparse warning "array of flexible structures" Jakub K/Simon H
>     - Use new variable and validate ff_mask_size before set_cap. MST
> v7:
>     - Set ff->ff_{caps, mask, actions} NULL in error path. Paolo Abeni
>     - Return errors from virtnet_ff_init, -ENOTSUPP is not fatal. Xuan
> 
> v8:
>     - Use real_ff_mask_size when setting the selector caps. Jason Wang
> 
> v9:
>     - Set err after failed memory allocations. Simon Horman
> 
> v10:
>     - Return -EOPNOTSUPP in virnet_ff_init before allocing any memory.
>       Jason/Paolo.
> 
> v11:
>     - Return -EINVAL if any resource limit is 0. Simon Horman
>     - Ensure we don't overrun alloced space of ff->ff_mask by moving the
>       real_ff_mask_size > ff_mask_size check into the loop. Simon Horman
> 
> v12:
>     - Move uapi includes to virtio_net.c vs header file. MST
>     - Remove kernel.h header in virtio_net_ff uapi. MST
>     - WARN_ON_ONCE in error paths validating selectors. MST
>     - Move includes from .h to .c files. MST
>     - Add WARN_ON_ONCE if obj_destroy fails. MST
>     - Comment cleanup in virito_net_ff.h uapi. MST
>     - Add 2 byte pad to the end of virtio_net_ff_cap_data.
>       
> https://lore.kernel.org/virtio-comment/[email protected]/T/#m930988a5d3db316c68546d8b61f4b94f6ebda030
>     - Cleanup and reinit in the freeze/restore path. MST
> 
> v13:
>     - Added /* private: */ comment before reserved field. Jakub
>     - Change ff_mask validation to break at unkonwn selector type. This
>       will allow compatability with newer controllers if the types of
>       selectors is expanded. MST
> 
> v14:
>     - Handle err from virtnet_ff_init in virtnet_restore_up. MST
> 
> v15:
>     - In virtnet_restore_up only call virtnet_close in err path if
>       netif_runnig. AI
> 
> v16:
>     - Return 0 from virtnet_restore_up if virtnet_init_ff return not
>       supported. AI
> 
> v17:
>     - During restore freeze_down on error during ff_init. AI
> 
> v18:
>     - Changed selector cap validation to verify size for each type
>       instead of just checking they weren't bigger than max size. AI
>     - Added __count_by attribute to flexible members in uapi. Paolo A
> 
> v19:
>     - Fixed ;; and incorrect plural in comment. AI
> 
> v20:
>     - include uapi/linux/stddef.h for __counted_by. AI


AI has led you astray, sadly (




> ---
>  drivers/net/virtio_net.c           | 231 ++++++++++++++++++++++++++++-
>  include/uapi/linux/virtio_net_ff.h |  91 ++++++++++++
>  2 files changed, 321 insertions(+), 1 deletion(-)
>  create mode 100644 include/uapi/linux/virtio_net_ff.h
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index db88dcaefb20..2cfa37e2f83f 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -26,6 +26,11 @@
>  #include <net/netdev_rx_queue.h>
>  #include <net/netdev_queues.h>
>  #include <net/xdp_sock_drv.h>
> +#include <linux/virtio_admin.h>
> +#include <net/ipv6.h>
> +#include <net/ip.h>
> +#include <uapi/linux/virtio_pci.h>
> +#include <uapi/linux/virtio_net_ff.h>
>  
>  static int napi_weight = NAPI_POLL_WEIGHT;
>  module_param(napi_weight, int, 0444);
> @@ -281,6 +286,14 @@ static const struct virtnet_stat_desc 
> virtnet_stats_tx_speed_desc_qstat[] = {
>       VIRTNET_STATS_DESC_TX_QSTAT(speed, ratelimit_packets, 
> hw_drop_ratelimits),
>  };
>  
> +struct virtnet_ff {
> +     struct virtio_device *vdev;
> +     bool ff_supported;
> +     struct virtio_net_ff_cap_data *ff_caps;
> +     struct virtio_net_ff_cap_mask_data *ff_mask;
> +     struct virtio_net_ff_actions *ff_actions;
> +};
> +
>  #define VIRTNET_Q_TYPE_RX 0
>  #define VIRTNET_Q_TYPE_TX 1
>  #define VIRTNET_Q_TYPE_CQ 2
> @@ -488,6 +501,7 @@ struct virtnet_info {
>       TRAILING_OVERLAP(struct virtio_net_rss_config_trailer, rss_trailer, 
> hash_key_data,
>               u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE];
>       );
> +     struct virtnet_ff ff;
>  };
>  static_assert(offsetof(struct virtnet_info, rss_trailer.hash_key_data) ==
>             offsetof(struct virtnet_info, rss_hash_key_data));
> @@ -526,6 +540,7 @@ static struct sk_buff *virtnet_skb_append_frag(struct 
> sk_buff *head_skb,
>                                              struct page *page, void *buf,
>                                              int len, int truesize);
>  static void virtnet_xsk_completed(struct send_queue *sq, int num);
> +static void remove_vq_common(struct virtnet_info *vi);
>  
>  enum virtnet_xmit_type {
>       VIRTNET_XMIT_TYPE_SKB,
> @@ -5684,6 +5699,192 @@ static const struct netdev_stat_ops virtnet_stat_ops 
> = {
>       .get_base_stats         = virtnet_get_base_stats,
>  };
>  
> +static size_t get_mask_size(u16 type)
> +{
> +     switch (type) {
> +     case VIRTIO_NET_FF_MASK_TYPE_ETH:
> +             return sizeof(struct ethhdr);
> +     case VIRTIO_NET_FF_MASK_TYPE_IPV4:
> +             return sizeof(struct iphdr);
> +     case VIRTIO_NET_FF_MASK_TYPE_IPV6:
> +             return sizeof(struct ipv6hdr);
> +     case VIRTIO_NET_FF_MASK_TYPE_TCP:
> +             return sizeof(struct tcphdr);
> +     case VIRTIO_NET_FF_MASK_TYPE_UDP:
> +             return sizeof(struct udphdr);
> +     }
> +
> +     return 0;
> +}
> +
> +static int virtnet_ff_init(struct virtnet_ff *ff, struct virtio_device *vdev)
> +{
> +     size_t ff_mask_size = sizeof(struct virtio_net_ff_cap_mask_data) +
> +                           sizeof(struct virtio_net_ff_selector) *
> +                           VIRTIO_NET_FF_MASK_TYPE_MAX;
> +     struct virtio_admin_cmd_query_cap_id_result *cap_id_list;
> +     struct virtio_net_ff_selector *sel;
> +     unsigned long sel_types = 0;
> +     size_t real_ff_mask_size;
> +     int err;
> +     int i;
> +
> +     if (!vdev->config->admin_cmd_exec)
> +             return -EOPNOTSUPP;
> +
> +     cap_id_list = kzalloc(sizeof(*cap_id_list), GFP_KERNEL);
> +     if (!cap_id_list)
> +             return -ENOMEM;
> +
> +     err = virtio_admin_cap_id_list_query(vdev, cap_id_list);
> +     if (err)
> +             goto err_cap_list;
> +
> +     if (!(VIRTIO_CAP_IN_LIST(cap_id_list,
> +                              VIRTIO_NET_FF_RESOURCE_CAP) &&
> +           VIRTIO_CAP_IN_LIST(cap_id_list,
> +                              VIRTIO_NET_FF_SELECTOR_CAP) &&
> +           VIRTIO_CAP_IN_LIST(cap_id_list,
> +                              VIRTIO_NET_FF_ACTION_CAP))) {
> +             err = -EOPNOTSUPP;
> +             goto err_cap_list;
> +     }
> +
> +     ff->ff_caps = kzalloc(sizeof(*ff->ff_caps), GFP_KERNEL);
> +     if (!ff->ff_caps) {
> +             err = -ENOMEM;
> +             goto err_cap_list;
> +     }
> +
> +     err = virtio_admin_cap_get(vdev,
> +                                VIRTIO_NET_FF_RESOURCE_CAP,
> +                                ff->ff_caps,
> +                                sizeof(*ff->ff_caps));
> +
> +     if (err)
> +             goto err_ff;
> +
> +     if (!ff->ff_caps->groups_limit ||
> +         !ff->ff_caps->classifiers_limit ||
> +         !ff->ff_caps->rules_limit ||
> +         !ff->ff_caps->rules_per_group_limit) {
> +             err = -EINVAL;
> +             goto err_ff;
> +     }
> +
> +     /* VIRTIO_NET_FF_MASK_TYPE start at 1 */
> +     for (i = 1; i <= VIRTIO_NET_FF_MASK_TYPE_MAX; i++)
> +             ff_mask_size += get_mask_size(i);
> +
> +     ff->ff_mask = kzalloc(ff_mask_size, GFP_KERNEL);
> +     if (!ff->ff_mask) {
> +             err = -ENOMEM;
> +             goto err_ff;
> +     }
> +
> +     err = virtio_admin_cap_get(vdev,
> +                                VIRTIO_NET_FF_SELECTOR_CAP,
> +                                ff->ff_mask,
> +                                ff_mask_size);

So ff_actions is from device and ff_actions->count does not seem to be checked.

If device somehow gains a larger mask down the road, can it not then overflow?
or malicious?


> +
> +     if (err)
> +             goto err_ff_mask;
> +
> +     ff->ff_actions = kzalloc(sizeof(*ff->ff_actions) +
> +                                     VIRTIO_NET_FF_ACTION_MAX,
> +                                     GFP_KERNEL);
> +     if (!ff->ff_actions) {
> +             err = -ENOMEM;
> +             goto err_ff_mask;
> +     }
> +
> +     err = virtio_admin_cap_get(vdev,
> +                                VIRTIO_NET_FF_ACTION_CAP,
> +                                ff->ff_actions,
> +                                sizeof(*ff->ff_actions) + 
> VIRTIO_NET_FF_ACTION_MAX);

So ff_actions is from device and ff_actions->count is not checked.

If device gains a ton of actions down the road, can it not then overflow?
or malicious?

> +
> +     if (err)
> +             goto err_ff_action;
> +
> +     err = virtio_admin_cap_set(vdev,
> +                                VIRTIO_NET_FF_RESOURCE_CAP,
> +                                ff->ff_caps,
> +                                sizeof(*ff->ff_caps));
> +     if (err)
> +             goto err_ff_action;
> +
> +     real_ff_mask_size = sizeof(struct virtio_net_ff_cap_mask_data);
> +     sel = (void *)&ff->ff_mask->selectors;
> +
> +     for (i = 0; i < ff->ff_mask->count; i++) {
> +             /* If the selector type is unknown it may indicate the spec
> +              * has been revised to include new types of selectors
> +              */
> +             if (sel->type > VIRTIO_NET_FF_MASK_TYPE_MAX)

do you want to check sel->type 0 too?

> +                     break;

but count remains unchanged? should we not to reduce count here
so device knows what driver can drive?


> +
> +             if (sel->length != get_mask_size(sel->type) ||
> +                 test_and_set_bit(sel->type, &sel_types)) {
> +                     WARN_ON_ONCE(true);
> +                     err = -EINVAL;
> +                     goto err_ff_action;
> +             }
> +             real_ff_mask_size += sizeof(struct virtio_net_ff_selector) + 
> sel->length;
> +             if (real_ff_mask_size > ff_mask_size) {
> +                     WARN_ON_ONCE(true);
> +                     err = -EINVAL;
> +                     goto err_ff_action;
> +             }
> +             sel = (void *)sel + sizeof(*sel) + sel->length;
> +     }
> +
> +     err = virtio_admin_cap_set(vdev,
> +                                VIRTIO_NET_FF_SELECTOR_CAP,
> +                                ff->ff_mask,
> +                                real_ff_mask_size);
> +     if (err)
> +             goto err_ff_action;
> +
> +     err = virtio_admin_cap_set(vdev,
> +                                VIRTIO_NET_FF_ACTION_CAP,
> +                                ff->ff_actions,
> +                                sizeof(*ff->ff_actions) + 
> VIRTIO_NET_FF_ACTION_MAX);
> +     if (err)
> +             goto err_ff_action;
> +
> +     ff->vdev = vdev;
> +     ff->ff_supported = true;
> +
> +     kfree(cap_id_list);
> +
> +     return 0;
> +
> +err_ff_action:
> +     kfree(ff->ff_actions);
> +     ff->ff_actions = NULL;
> +err_ff_mask:
> +     kfree(ff->ff_mask);
> +     ff->ff_mask = NULL;
> +err_ff:
> +     kfree(ff->ff_caps);
> +     ff->ff_caps = NULL;
> +err_cap_list:
> +     kfree(cap_id_list);
> +
> +     return err;
> +}
> +
> +static void virtnet_ff_cleanup(struct virtnet_ff *ff)
> +{
> +     if (!ff->ff_supported)
> +             return;
> +
> +     kfree(ff->ff_actions);
> +     kfree(ff->ff_mask);
> +     kfree(ff->ff_caps);
> +     ff->ff_supported = false;
> +}
> +
>  static void virtnet_freeze_down(struct virtio_device *vdev)
>  {
>       struct virtnet_info *vi = vdev->priv;
> @@ -5702,6 +5903,10 @@ static void virtnet_freeze_down(struct virtio_device 
> *vdev)
>       netif_tx_lock_bh(vi->dev);
>       netif_device_detach(vi->dev);
>       netif_tx_unlock_bh(vi->dev);
> +
> +     rtnl_lock();
> +     virtnet_ff_cleanup(&vi->ff);
> +     rtnl_unlock();
>  }
>  
>  static int init_vqs(struct virtnet_info *vi);
> @@ -5727,10 +5932,23 @@ static int virtnet_restore_up(struct virtio_device 
> *vdev)
>                       return err;
>       }
>  
> +     /* Initialize flow filters. Not supported is an acceptable and common
> +      * return code
> +      */
> +     rtnl_lock();
> +     err = virtnet_ff_init(&vi->ff, vi->vdev);
> +     if (err && err != -EOPNOTSUPP) {
> +             rtnl_unlock();
> +             virtnet_freeze_down(vi->vdev);
> +             remove_vq_common(vi);
> +             return err;
> +     }
> +     rtnl_unlock();
> +
>       netif_tx_lock_bh(vi->dev);
>       netif_device_attach(vi->dev);
>       netif_tx_unlock_bh(vi->dev);
> -     return err;
> +     return 0;
>  }
>  
>  static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
> @@ -7058,6 +7276,15 @@ static int virtnet_probe(struct virtio_device *vdev)
>       }
>       vi->guest_offloads_capable = vi->guest_offloads;
>  
> +     /* Initialize flow filters. Not supported is an acceptable and common
> +      * return code
> +      */
> +     err = virtnet_ff_init(&vi->ff, vi->vdev);
> +     if (err && err != -EOPNOTSUPP) {
> +             rtnl_unlock();
> +             goto free_unregister_netdev;
> +     }
> +
>       rtnl_unlock();
>  
>       err = virtnet_cpu_notif_add(vi);
> @@ -7073,6 +7300,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>  
>  free_unregister_netdev:
>       unregister_netdev(dev);
> +     virtnet_ff_cleanup(&vi->ff);
>  free_failover:
>       net_failover_destroy(vi->failover);
>  free_vqs:
> @@ -7121,6 +7349,7 @@ static void virtnet_remove(struct virtio_device *vdev)
>       virtnet_free_irq_moder(vi);
>  
>       unregister_netdev(vi->dev);
> +     virtnet_ff_cleanup(&vi->ff);
>  
>       net_failover_destroy(vi->failover);
>  
> diff --git a/include/uapi/linux/virtio_net_ff.h 
> b/include/uapi/linux/virtio_net_ff.h
> new file mode 100644
> index 000000000000..552a6b3a8a91
> --- /dev/null
> +++ b/include/uapi/linux/virtio_net_ff.h
> @@ -0,0 +1,91 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
> + *
> + * Header file for virtio_net flow filters
> + */
> +#ifndef _LINUX_VIRTIO_NET_FF_H
> +#define _LINUX_VIRTIO_NET_FF_H
> +
> +#include <linux/types.h>
> +#include <uapi/linux/stddef.h>
> +
> +#define VIRTIO_NET_FF_RESOURCE_CAP 0x800
> +#define VIRTIO_NET_FF_SELECTOR_CAP 0x801
> +#define VIRTIO_NET_FF_ACTION_CAP 0x802
> +
> +/**
> + * struct virtio_net_ff_cap_data - Flow filter resource capability limits
> + * @groups_limit: maximum number of flow filter groups supported by the 
> device
> + * @classifiers_limit: maximum number of classifiers supported by the device
> + * @rules_limit: maximum number of rules supported device-wide across all 
> groups
> + * @rules_per_group_limit: maximum number of rules allowed in a single group
> + * @last_rule_priority: priority value associated with the lowest-priority 
> rule
> + * @selectors_per_classifier_limit: maximum selectors allowed in one 
> classifier
> + */
> +struct virtio_net_ff_cap_data {
> +     __le32 groups_limit;
> +     __le32 classifiers_limit;
> +     __le32 rules_limit;
> +     __le32 rules_per_group_limit;
> +     __u8 last_rule_priority;
> +     __u8 selectors_per_classifier_limit;
> +     /* private: */
> +     __u8 reserved[2];
> +};
> +
> +/**
> + * struct virtio_net_ff_selector - Selector mask descriptor
> + * @type: selector type, one of VIRTIO_NET_FF_MASK_TYPE_* constants
> + * @flags: selector flags, see VIRTIO_NET_FF_MASK_F_* constants
> + * @reserved: must be set to 0 by the driver and ignored by the device
> + * @length: size in bytes of @mask
> + * @reserved1: must be set to 0 by the driver and ignored by the device
> + * @mask: variable-length mask payload for @type, length given by @length
> + *
> + * A selector describes a header mask that a classifier can apply. The format
> + * of @mask depends on @type.
> + */
> +struct virtio_net_ff_selector {
> +     __u8 type;
> +     __u8 flags;
> +     __u8 reserved[2];
> +     __u8 length;
> +     __u8 reserved1[3];
> +     __u8 mask[] __counted_by(length);
> +};
> +
> +#define VIRTIO_NET_FF_MASK_TYPE_ETH  1
> +#define VIRTIO_NET_FF_MASK_TYPE_IPV4 2
> +#define VIRTIO_NET_FF_MASK_TYPE_IPV6 3
> +#define VIRTIO_NET_FF_MASK_TYPE_TCP  4
> +#define VIRTIO_NET_FF_MASK_TYPE_UDP  5
> +#define VIRTIO_NET_FF_MASK_TYPE_MAX  VIRTIO_NET_FF_MASK_TYPE_UDP
> +
> +/**
> + * struct virtio_net_ff_cap_mask_data - Supported selector mask formats
> + * @count: number of entries in @selectors
> + * @reserved: must be set to 0 by the driver and ignored by the device
> + * @selectors: packed array of struct virtio_net_ff_selector.
> + */
> +struct virtio_net_ff_cap_mask_data {
> +     __u8 count;
> +     __u8 reserved[7];
> +     __u8 selectors[] __counted_by(count);

This looks wrong to me. count is # of selectors (packed entries) not
bytes.




> +};
> +
> +#define VIRTIO_NET_FF_MASK_F_PARTIAL_MASK (1 << 0)
> +
> +#define VIRTIO_NET_FF_ACTION_DROP 1
> +#define VIRTIO_NET_FF_ACTION_RX_VQ 2
> +#define VIRTIO_NET_FF_ACTION_MAX  VIRTIO_NET_FF_ACTION_RX_VQ
> +/**
> + * struct virtio_net_ff_actions - Supported flow actions
> + * @count: number of supported actions in @actions
> + * @reserved: must be set to 0 by the driver and ignored by the device
> + * @actions: array of action identifiers (VIRTIO_NET_FF_ACTION_*)
> + */
> +struct virtio_net_ff_actions {
> +     __u8 count;
> +     __u8 reserved[7];
> +     __u8 actions[] __counted_by(count);


this too.

> +};
> +#endif
> -- 
> 2.50.1

Re: [PATCH net-next v20 05/12] virtio_net: Query and set flow filter caps

Reply via email to