On Tue, Sep 23, 2025 at 09:19:15AM -0500, Daniel Jurgens wrote:
> Filtering a flow requires a classifier to match the packets, and a rule
> to filter on the matches.
> 
> A classifier consists of one or more selectors. There is one selector
> per header type. A selector must only use fields set in the selector
> capabality. If partial matching is supported, the classifier mask for a
> particular field can be a subset of the mask for that field in the
> capability.
> 
> The rule consists of a priority, an action and a key. The key is a byte
> array containing headers corresponding to the selectors in the
> classifier.
> 
> This patch implements ethtool rules for ethernet headers.
> 
> Example:
> $ ethtool -U ens9 flow-type ether dst 08:11:22:33:44:54 action 30
> Added rule with ID 1
> 
> The rule in the example directs received packets with the specified
> destination MAC address to rq 30.


As you are adding things to UAPI header pls document this fact, too.


> Signed-off-by: Daniel Jurgens <[email protected]>
> Reviewed-by: Parav Pandit <[email protected]>
> Reviewed-by: Shahar Shitrit <[email protected]>
> ---
>  drivers/net/virtio_net/virtio_net_ff.c   | 423 +++++++++++++++++++++++
>  drivers/net/virtio_net/virtio_net_ff.h   |  14 +
>  drivers/net/virtio_net/virtio_net_main.c |  16 +
>  include/uapi/linux/virtio_net_ff.h       |  20 ++
>  4 files changed, 473 insertions(+)
> 
> diff --git a/drivers/net/virtio_net/virtio_net_ff.c 
> b/drivers/net/virtio_net/virtio_net_ff.c
> index 0036c2db9f77..e3c34bfd1d55 100644
> --- a/drivers/net/virtio_net/virtio_net_ff.c
> +++ b/drivers/net/virtio_net/virtio_net_ff.c
> @@ -9,6 +9,418 @@
>  #define VIRTNET_FF_ETHTOOL_GROUP_PRIORITY 1
>  #define VIRTNET_FF_MAX_GROUPS 1
>  
> +struct virtnet_ethtool_rule {
> +     struct ethtool_rx_flow_spec flow_spec;
> +     u32 classifier_id;
> +};
> +
> +/* New fields must be added before the classifier struct */
> +struct virtnet_classifier {
> +     size_t size;
> +     u32 id;
> +     struct virtio_net_resource_obj_ff_classifier classifier;
> +};
> +
> +static bool check_mask_vs_cap(const void *m, const void *c,
> +                           u16 len, bool partial)
> +{
> +     const u8 *mask = m;
> +     const u8 *cap = c;
> +     int i;
> +
> +     for (i = 0; i < len; i++) {
> +             if (partial && ((mask[i] & cap[i]) != mask[i]))
> +                     return false;
> +             if (!partial && mask[i] != cap[i])
> +                     return false;
> +     }
> +
> +     return true;
> +}
> +
> +static
> +struct virtio_net_ff_selector *get_selector_cap(const struct virtnet_ff *ff,
> +                                             u8 selector_type)
> +{
> +     struct virtio_net_ff_selector *sel;
> +     u8 *buf;
> +     int i;
> +
> +     buf = (u8 *)&ff->ff_mask->selectors;
> +     sel = (struct virtio_net_ff_selector *)buf;
> +
> +     for (i = 0; i < ff->ff_mask->count; i++) {
> +             if (sel->type == selector_type)
> +                     return sel;
> +
> +             buf += sizeof(struct virtio_net_ff_selector) + sel->length;
> +             sel = (struct virtio_net_ff_selector *)buf;
> +     }
> +
> +     return NULL;
> +}
> +
> +static bool validate_eth_mask(const struct virtnet_ff *ff,
> +                           const struct virtio_net_ff_selector *sel,
> +                           const struct virtio_net_ff_selector *sel_cap)
> +{
> +     bool partial_mask = !!(sel_cap->flags & 
> VIRTIO_NET_FF_MASK_F_PARTIAL_MASK);
> +     struct ethhdr *cap, *mask;
> +     struct ethhdr zeros = {0};
> +
> +     cap = (struct ethhdr *)&sel_cap->mask;
> +     mask = (struct ethhdr *)&sel->mask;
> +
> +     if (memcmp(&zeros.h_dest, mask->h_dest, sizeof(zeros.h_dest)) &&
> +         !check_mask_vs_cap(mask->h_dest, cap->h_dest,
> +                            sizeof(mask->h_dest), partial_mask))
> +             return false;
> +
> +     if (memcmp(&zeros.h_source, mask->h_source, sizeof(zeros.h_source)) &&
> +         !check_mask_vs_cap(mask->h_source, cap->h_source,
> +                            sizeof(mask->h_source), partial_mask))
> +             return false;
> +
> +     if (mask->h_proto &&
> +         !check_mask_vs_cap(&mask->h_proto, &cap->h_proto,
> +                            sizeof(__be16), partial_mask))
> +             return false;
> +
> +     return true;
> +}
> +
> +static bool validate_mask(const struct virtnet_ff *ff,
> +                       const struct virtio_net_ff_selector *sel)
> +{
> +     struct virtio_net_ff_selector *sel_cap = get_selector_cap(ff, 
> sel->type);
> +
> +     if (!sel_cap)
> +             return false;
> +
> +     switch (sel->type) {
> +     case VIRTIO_NET_FF_MASK_TYPE_ETH:
> +             return validate_eth_mask(ff, sel, sel_cap);
> +     }
> +
> +     return false;
> +}
> +
> +static int setup_classifier(struct virtnet_ff *ff, struct virtnet_classifier 
> *c)
> +{
> +     int err;
> +
> +     err = xa_alloc(&ff->classifiers, &c->id, c,
> +                    XA_LIMIT(0, le32_to_cpu(ff->ff_caps->classifiers_limit) 
> - 1),
> +                    GFP_KERNEL);
> +     if (err)
> +             return err;
> +
> +     err = virtio_device_object_create(ff->vdev,
> +                                       VIRTIO_NET_RESOURCE_OBJ_FF_CLASSIFIER,
> +                                       c->id,
> +                                       &c->classifier,
> +                                       c->size);
> +     if (err)
> +             goto err_xarray;
> +
> +     return 0;
> +
> +err_xarray:
> +     xa_erase(&ff->classifiers, c->id);
> +
> +     return err;
> +}
> +
> +static void destroy_classifier(struct virtnet_ff *ff,
> +                            u32 classifier_id)
> +{
> +     struct virtnet_classifier *c;
> +
> +     c = xa_load(&ff->classifiers, classifier_id);
> +     if (c) {
> +             virtio_device_object_destroy(ff->vdev,
> +                                          
> VIRTIO_NET_RESOURCE_OBJ_FF_CLASSIFIER,
> +                                          c->id);
> +
> +             xa_erase(&ff->classifiers, c->id);
> +             kfree(c);
> +     }
> +}
> +
> +static void destroy_ethtool_rule(struct virtnet_ff *ff,
> +                              struct virtnet_ethtool_rule *eth_rule)
> +{
> +     ff->ethtool.num_rules--;
> +
> +     virtio_device_object_destroy(ff->vdev,
> +                                  VIRTIO_NET_RESOURCE_OBJ_FF_RULE,
> +                                  eth_rule->flow_spec.location);
> +
> +     xa_erase(&ff->ethtool.rules, eth_rule->flow_spec.location);
> +     destroy_classifier(ff, eth_rule->classifier_id);
> +     kfree(eth_rule);
> +}
> +
> +static int insert_rule(struct virtnet_ff *ff,
> +                    struct virtnet_ethtool_rule *eth_rule,
> +                    u32 classifier_id,
> +                    const u8 *key,
> +                    size_t key_size)
> +{
> +     struct ethtool_rx_flow_spec *fs = &eth_rule->flow_spec;
> +     struct virtio_net_resource_obj_ff_rule *ff_rule;
> +     int err;
> +
> +     ff_rule = kzalloc(sizeof(*ff_rule) + key_size, GFP_KERNEL);
> +     if (!ff_rule) {
> +             err = -ENOMEM;
> +             goto err_eth_rule;
> +     }
> +     /*
> +      * Intentionally leave the priority as 0. All rules have the same
> +      * priority.
> +      */
> +     ff_rule->group_id = cpu_to_le32(VIRTNET_FF_ETHTOOL_GROUP_PRIORITY);
> +     ff_rule->classifier_id = cpu_to_le32(classifier_id);
> +     ff_rule->key_length = (u8)key_size;

Do we know that key size is <256?



> +     ff_rule->action = fs->ring_cookie == RX_CLS_FLOW_DISC ?
> +                                          VIRTIO_NET_FF_ACTION_DROP :
> +                                          VIRTIO_NET_FF_ACTION_RX_VQ;
> +     ff_rule->vq_index = fs->ring_cookie != RX_CLS_FLOW_DISC ?
> +                                            cpu_to_le16(fs->ring_cookie) : 0;
> +     memcpy(&ff_rule->keys, key, key_size);
> +
> +     err = virtio_device_object_create(ff->vdev,
> +                                       VIRTIO_NET_RESOURCE_OBJ_FF_RULE,
> +                                       fs->location,
> +                                       ff_rule,
> +                                       sizeof(*ff_rule) + key_size);
> +     if (err)
> +             goto err_ff_rule;
> +
> +     eth_rule->classifier_id = classifier_id;
> +     ff->ethtool.num_rules++;
> +     kfree(ff_rule);
> +
> +     return 0;
> +
> +err_ff_rule:
> +     kfree(ff_rule);
> +err_eth_rule:
> +     xa_erase(&ff->ethtool.rules, eth_rule->flow_spec.location);
> +     kfree(eth_rule);

This is a weird way to handle errors. You never added or allocated eth_rule,
which are you erasing and freeing here?


Checking callers:

        > +     err = build_and_insert(ff, eth_rule);
        > +     if (err)
        > +             goto err_xa;
        > +
        > +     return err;
        > +
        > +err_xa:
        > +     xa_erase(&ff->ethtool.rules, eth_rule->flow_spec.location);
        > +
        > +err_rule:
        > +     fs->location = RX_CLS_LOC_ANY;
        > +     kfree(eth_rule);

looks like double free to me.



> +
> +     return err;
> +}
> +
> +static u32 flow_type_mask(u32 flow_type)
> +{
> +     return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
> +}
> +
> +static bool supported_flow_type(const struct ethtool_rx_flow_spec *fs)
> +{
> +     switch (fs->flow_type) {
> +     case ETHER_FLOW:
> +             return true;
> +     }
> +
> +     return false;
> +}
> +
> +static int validate_flow_input(struct virtnet_ff *ff,
> +                            const struct ethtool_rx_flow_spec *fs,
> +                            u16 curr_queue_pairs)
> +{
> +     /* Force users to use RX_CLS_LOC_ANY - don't allow specific locations */
> +     if (fs->location != RX_CLS_LOC_ANY)
> +             return -EOPNOTSUPP;
> +
> +     if (fs->ring_cookie != RX_CLS_FLOW_DISC &&
> +         fs->ring_cookie >= curr_queue_pairs)
> +             return -EINVAL;
> +
> +     if (fs->flow_type != flow_type_mask(fs->flow_type))
> +             return -EOPNOTSUPP;
> +
> +     if (!supported_flow_type(fs))
> +             return -EOPNOTSUPP;
> +
> +     return 0;
> +}
> +
> +static void calculate_flow_sizes(struct ethtool_rx_flow_spec *fs,
> +                              size_t *key_size, size_t *classifier_size,
> +                              int *num_hdrs)
> +{
> +     *num_hdrs = 1;
> +     *key_size = sizeof(struct ethhdr);
> +     /*
> +      * The classifier size is the size of the classifier header, a selector
> +      * header for each type of header in the match criteria, and each header
> +      * providing the mask for matching against.
> +      */
> +     *classifier_size = *key_size +
> +                        sizeof(struct virtio_net_resource_obj_ff_classifier) 
> +
> +                        sizeof(struct virtio_net_ff_selector) * (*num_hdrs);
> +}
> +
> +static void setup_eth_hdr_key_mask(struct virtio_net_ff_selector *selector,
> +                                u8 *key,
> +                                const struct ethtool_rx_flow_spec *fs)
> +{
> +     struct ethhdr *eth_m = (struct ethhdr *)&selector->mask;
> +     struct ethhdr *eth_k = (struct ethhdr *)key;
> +
> +     selector->type = VIRTIO_NET_FF_MASK_TYPE_ETH;
> +     selector->length = sizeof(struct ethhdr);
> +
> +     memcpy(eth_m, &fs->m_u.ether_spec, sizeof(*eth_m));
> +     memcpy(eth_k, &fs->h_u.ether_spec, sizeof(*eth_k));
> +}
> +
> +static int
> +validate_classifier_selectors(struct virtnet_ff *ff,
> +                           struct virtio_net_resource_obj_ff_classifier 
> *classifier,
> +                           int num_hdrs)
> +{
> +     struct virtio_net_ff_selector *selector = classifier->selectors;
> +
> +     for (int i = 0; i < num_hdrs; i++) {


not sure kernel style allows these.
i think you should declare these at beginning of block.


> +             if (!validate_mask(ff, selector))
> +                     return -EINVAL;
> +
> +             selector = (struct virtio_net_ff_selector *)(((u8 *)selector) +
> +                         sizeof(*selector) + selector->length);
> +     }
> +
> +     return 0;
> +}
> +
> +static int build_and_insert(struct virtnet_ff *ff,
> +                         struct virtnet_ethtool_rule *eth_rule)
> +{
> +     struct virtio_net_resource_obj_ff_classifier *classifier;
> +     struct ethtool_rx_flow_spec *fs = &eth_rule->flow_spec;
> +     struct virtio_net_ff_selector *selector;
> +     struct virtnet_classifier *c;
> +     size_t classifier_size;
> +     size_t key_size;
> +     int num_hdrs;
> +     u8 *key;
> +     int err;
> +
> +     calculate_flow_sizes(fs, &key_size, &classifier_size, &num_hdrs);
> +
> +     key = kzalloc(key_size, GFP_KERNEL);
> +     if (!key)
> +             return -ENOMEM;
> +
> +     /*
> +      * virtio_net_ff_obj_ff_classifier is already included in the
> +      * classifier_size.
> +      */
> +     c = kzalloc(classifier_size +
> +                 sizeof(struct virtnet_classifier) -
> +                 sizeof(struct virtio_net_resource_obj_ff_classifier),

do we know all this math does not overflow?

> +                 GFP_KERNEL);
> +     if (!c) {
> +             kfree(key);
> +             return -ENOMEM;
> +     }
> +
> +     c->size = classifier_size;
> +     classifier = &c->classifier;
> +     classifier->count = num_hdrs;
> +     selector = &classifier->selectors[0];
> +
> +     setup_eth_hdr_key_mask(selector, key, fs);
> +
> +     err = validate_classifier_selectors(ff, classifier, num_hdrs);
> +     if (err)
> +             goto err_key;
> +
> +     err = setup_classifier(ff, c);
> +     if (err)
> +             goto err_classifier;
> +
> +     err = insert_rule(ff, eth_rule, c->id, key, key_size);
> +     if (err) {
> +             destroy_classifier(ff, c->id);
> +             goto err_key;
> +     }
> +
> +     return 0;
> +
> +err_classifier:
> +     kfree(c);
> +err_key:
> +     kfree(key);
> +
> +     return err;
> +}
> +
> +int virtnet_ethtool_flow_insert(struct virtnet_ff *ff,
> +                             struct ethtool_rx_flow_spec *fs,
> +                             u16 curr_queue_pairs)
> +{
> +     struct virtnet_ethtool_rule *eth_rule;
> +     int err;
> +
> +     if (!ff->ff_supported)
> +             return -EOPNOTSUPP;
> +
> +     err = validate_flow_input(ff, fs, curr_queue_pairs);
> +     if (err)
> +             return err;
> +
> +     eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL);
> +     if (!eth_rule)
> +             return -ENOMEM;
> +
> +     err = xa_alloc(&ff->ethtool.rules, &fs->location, eth_rule,
> +                    XA_LIMIT(0, le32_to_cpu(ff->ff_caps->rules_limit) - 1),
> +                    GFP_KERNEL);
> +     if (err)
> +             goto err_rule;
> +
> +     eth_rule->flow_spec = *fs;
> +
> +     err = build_and_insert(ff, eth_rule);
> +     if (err)
> +             goto err_xa;
> +
> +     return err;
> +
> +err_xa:
> +     xa_erase(&ff->ethtool.rules, eth_rule->flow_spec.location);
> +
> +err_rule:
> +     fs->location = RX_CLS_LOC_ANY;
> +     kfree(eth_rule);
> +
> +     return err;
> +}
> +
> +int virtnet_ethtool_flow_remove(struct virtnet_ff *ff, int location)
> +{
> +     struct virtnet_ethtool_rule *eth_rule;
> +     int err = 0;
> +
> +     if (!ff->ff_supported)
> +             return -EOPNOTSUPP;
> +
> +     eth_rule = xa_load(&ff->ethtool.rules, location);
> +     if (!eth_rule) {
> +             err = -ENOENT;
> +             goto out;
> +     }
> +
> +     destroy_ethtool_rule(ff, eth_rule);
> +out:
> +     return err;
> +}
> +
>  static size_t get_mask_size(u16 type)
>  {
>       switch (type) {
> @@ -142,6 +554,8 @@ void virtnet_ff_init(struct virtnet_ff *ff, struct 
> virtio_device *vdev)
>       if (err)
>               goto err_ff_action;
>  
> +     xa_init_flags(&ff->classifiers, XA_FLAGS_ALLOC);
> +     xa_init_flags(&ff->ethtool.rules, XA_FLAGS_ALLOC);
>       ff->vdev = vdev;
>       ff->ff_supported = true;
>  
> @@ -157,9 +571,18 @@ void virtnet_ff_init(struct virtnet_ff *ff, struct 
> virtio_device *vdev)
>  
>  void virtnet_ff_cleanup(struct virtnet_ff *ff)
>  {
> +     struct virtnet_ethtool_rule *eth_rule;
> +     unsigned long i;
> +
>       if (!ff->ff_supported)
>               return;
>  
> +     xa_for_each(&ff->ethtool.rules, i, eth_rule)
> +             destroy_ethtool_rule(ff, eth_rule);
> +
> +     xa_destroy(&ff->ethtool.rules);
> +     xa_destroy(&ff->classifiers);
> +
>       virtio_device_object_destroy(ff->vdev,
>                                    VIRTIO_NET_RESOURCE_OBJ_FF_GROUP,
>                                    VIRTNET_FF_ETHTOOL_GROUP_PRIORITY);
> diff --git a/drivers/net/virtio_net/virtio_net_ff.h 
> b/drivers/net/virtio_net/virtio_net_ff.h
> index 4aac0bd08b63..94b575fbd9ed 100644
> --- a/drivers/net/virtio_net/virtio_net_ff.h
> +++ b/drivers/net/virtio_net/virtio_net_ff.h
> @@ -3,20 +3,34 @@
>   * Header file for virtio_net flow filters
>   */
>  #include <linux/virtio_admin.h>
> +#include <uapi/linux/ethtool.h>
>  
>  #ifndef _VIRTIO_NET_FF_H
>  #define _VIRTIO_NET_FF_H
>  
> +struct virtnet_ethtool_ff {
> +     struct xarray rules;
> +     int    num_rules;
> +};
> +
>  struct virtnet_ff {
>       struct virtio_device *vdev;
>       bool ff_supported;
>       struct virtio_net_ff_cap_data *ff_caps;
>       struct virtio_net_ff_cap_mask_data *ff_mask;
>       struct virtio_net_ff_actions *ff_actions;
> +     struct xarray classifiers;
> +     int num_classifiers;
> +     struct virtnet_ethtool_ff ethtool;
>  };
>  
>  void virtnet_ff_init(struct virtnet_ff *ff, struct virtio_device *vdev);
>  
>  void virtnet_ff_cleanup(struct virtnet_ff *ff);
>  
> +int virtnet_ethtool_flow_insert(struct virtnet_ff *ff,
> +                             struct ethtool_rx_flow_spec *fs,
> +                             u16 curr_queue_pairs);
> +int virtnet_ethtool_flow_remove(struct virtnet_ff *ff, int location);
> +
>  #endif /* _VIRTIO_NET_FF_H */
> diff --git a/drivers/net/virtio_net/virtio_net_main.c 
> b/drivers/net/virtio_net/virtio_net_main.c
> index ebf3e5db0d64..808988cdf265 100644
> --- a/drivers/net/virtio_net/virtio_net_main.c
> +++ b/drivers/net/virtio_net/virtio_net_main.c
> @@ -5619,6 +5619,21 @@ static u32 virtnet_get_rx_ring_count(struct net_device 
> *dev)
>       return vi->curr_queue_pairs;
>  }
>  
> +static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc 
> *info)
> +{
> +     struct virtnet_info *vi = netdev_priv(dev);
> +
> +     switch (info->cmd) {
> +     case ETHTOOL_SRXCLSRLINS:
> +             return virtnet_ethtool_flow_insert(&vi->ff, &info->fs,
> +                                                vi->curr_queue_pairs);
> +     case ETHTOOL_SRXCLSRLDEL:
> +             return virtnet_ethtool_flow_remove(&vi->ff, info->fs.location);
> +     }
> +
> +     return -EOPNOTSUPP;
> +}
> +
>  static const struct ethtool_ops virtnet_ethtool_ops = {
>       .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
>               ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
> @@ -5645,6 +5660,7 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
>       .get_rxfh_fields = virtnet_get_hashflow,
>       .set_rxfh_fields = virtnet_set_hashflow,
>       .get_rx_ring_count = virtnet_get_rx_ring_count,
> +     .set_rxnfc = virtnet_set_rxnfc,
>  };
>  
>  static void virtnet_get_queue_stats_rx(struct net_device *dev, int i,
> diff --git a/include/uapi/linux/virtio_net_ff.h 
> b/include/uapi/linux/virtio_net_ff.h
> index 662693e1fefd..f258964322f4 100644
> --- a/include/uapi/linux/virtio_net_ff.h
> +++ b/include/uapi/linux/virtio_net_ff.h
> @@ -13,6 +13,8 @@
>  #define VIRTIO_NET_FF_ACTION_CAP 0x802
>  
>  #define VIRTIO_NET_RESOURCE_OBJ_FF_GROUP 0x0200
> +#define VIRTIO_NET_RESOURCE_OBJ_FF_CLASSIFIER 0x0201
> +#define VIRTIO_NET_RESOURCE_OBJ_FF_RULE 0x0202
>  
>  struct virtio_net_ff_cap_data {
>       __le32 groups_limit;
> @@ -59,4 +61,22 @@ struct virtio_net_resource_obj_ff_group {
>       __le16 group_priority;
>  };
>  
> +struct virtio_net_resource_obj_ff_classifier {
> +     __u8 count;
> +     __u8 reserved[7];
> +     struct virtio_net_ff_selector selectors[];
> +};
> +
> +struct virtio_net_resource_obj_ff_rule {
> +     __le32 group_id;
> +     __le32 classifier_id;
> +     __u8 rule_priority;
> +     __u8 key_length; /* length of key in bytes */
> +     __u8 action;
> +     __u8 reserved;
> +     __le16 vq_index;
> +     __u8 reserved1[2];
> +     __u8 keys[];
> +};
> +
>  #endif
> -- 
> 2.45.0


Reply via email to