On 29 Mar 2023, at 13:42, Chris Mi wrote:

> When offloading sample action to TC, userspace creates a unique ID
> to map sample action and tunnel info and passes this ID to kernel
> instead of the sample info. Kernel will send this ID and sampled
> packet to userspace. Using the ID, userspace can recover the sample
> info and send sampled packet to the right sample monitoring host.

Thanks for the update, see comments inline.

//Eelco

> Signed-off-by: Chris Mi <[email protected]>
> Reviewed-by: Roi Dayan <[email protected]>
> ---
>  lib/netdev-offload-tc.c | 119 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 119 insertions(+)
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 247c1ff8b..c53ea5588 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -41,6 +41,7 @@
>  #include "unaligned.h"
>  #include "util.h"
>  #include "dpif-provider.h"
> +#include "cmap.h"

Maybe you can sort the local includes, so it becomes something like this:

#include "cmap.h"
#include "dpif-provider.h"
#include "dpif.h"
#include "hash.h"
#include "id-pool.h"
#include "netdev-linux.h"
#include "netdev-offload-provider.h"
#include "netdev-provider.h"
#include "netdev-vport.h"
#include "netlink-socket.h"
#include "netlink.h"
#include "odp-netlink.h"
#include "odp-util.h"
#include "openvswitch/hmap.h"
#include "openvswitch/match.h"
#include "openvswitch/ofpbuf.h"
#include "openvswitch/thread.h"
#include "openvswitch/types.h"
#include "openvswitch/util.h"
#include "openvswitch/vlog.h"
#include "tc.h"
#include "unaligned.h"
#include "util.h"


>  VLOG_DEFINE_THIS_MODULE(netdev_offload_tc);
>
> @@ -103,6 +104,108 @@ static void parse_tc_flower_to_stats(struct tc_flower 
> *flower,
>  static int get_ufid_adjust_stats(const ovs_u128 *ufid,
>                                   struct dpif_flow_stats *stats);
>
> +/* When offloading sample action to TC, userspace creates a unique ID
> + * to map sample action and tunnel info and passes this ID to kernel
> + * instead of the sample info. Kernel will send this ID and sampled
> + * packet to userspace. Using the ID, userspace can recover the sample
> + * info and send sampled packet to the right sample monitoring host. */
> +struct offload_sample {
> +    uint16_t type;                     /* enum user_action_cookie_type. */
> +    struct nlattr *action;             /* Sample action. Used in flow_get. */
> +    struct nlattr *userdata;           /* Struct user_action_cookie. */
> +    struct nlattr *userspace_actions;  /* All actions to get output tunnel. 
> */
> +    struct flow_tnl *tunnel;           /* Input tunnel. */
> +};
> +
> +/* This maps a sample group ID to struct offload_sample. */
> +struct sgid_node {
> +    struct cmap_node id_node;
> +    uint32_t id;
> +    struct offload_sample sample;
> +};
> +

Add some details on the mutex:

/* The sgid_map mutex protects the sample_group_ids and the sgid_map for
 * cmap_insert(), cmap_remove(), or cmap_replace() operations. */

> +static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER;
> +static struct cmap sgid_map = CMAP_INITIALIZER;
> +static struct id_pool *sample_group_ids OVS_GUARDED_BY(sgid_lock);
> +
> +static void
> +sgid_node_free(struct sgid_node *node)
> +{
> +    if (node) {
> +        free(node->sample.tunnel);
> +        free(node->sample.action);
> +        free(node->sample.userspace_actions);
> +        free(node->sample.userdata);
> +        free(node);
> +    }
> +}
> +
> +static struct sgid_node *
> +sgid_find(uint32_t id)
> +{
> +    const struct cmap_node *node = cmap_find(&sgid_map, id);
> +
> +    return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL;
> +}
> +
> +static void
> +offload_sample_clone(struct offload_sample *new,
> +                     const struct offload_sample *old)

I think we should use src and dst in this, not old and new.

> +{
> +    new->type = old->type;
> +    new->action = xmemdup(old->action, old->action->nla_len);
> +    new->userspace_actions = old->userspace_actions
> +                             ? xmemdup(old->userspace_actions,
> +                                       old->userspace_actions->nla_len)
> +                             : NULL;
> +    new->userdata = xmemdup(old->userdata, old->userdata->nla_len);
> +    new->tunnel = old->tunnel
> +                  ? xmemdup(old->tunnel, sizeof *old->tunnel)
> +                  : NULL;

For all 4 types of xmemdup, we should use a newly to be created 
nullable_xmemdup(), see nullable_xstrdup() how/where to define it.

> +}
> +
> +/* Allocate a unique group id for the given set of flow metadata. The id
> + * space is 2^^32 - 1. 0 is reserved. */
> +static uint32_t
> +sgid_alloc(const struct offload_sample *sample)
> +{
> +    struct sgid_node *node = xzalloc(sizeof *node);
> +
> +    offload_sample_clone(&node->sample, sample);
> +    ovs_mutex_lock(&sgid_lock);
> +    if (!id_pool_alloc_id(sample_group_ids, &node->id)) {
> +        VLOG_ERR("Can't find a free sample group ID");
> +        sgid_node_free(node);
> +        ovs_mutex_unlock(&sgid_lock);
> +        return 0;
> +    }
> +    cmap_insert(&sgid_map, &node->id_node, node->id);
> +    ovs_mutex_unlock(&sgid_lock);
> +
> +    return node ? node->id : 0;

node can never be NULL as xzalloc() will assert, or if it would return 
offload_sample_clone() would SIGNAL.
Maybe you where trying to avoid nested unlocking, which I like, so maybe 
something like:

static uint32_t
sgid_alloc(const struct offload_sample *sample)
{
    struct sgid_node *node = xzalloc(sizeof *node);

    offload_sample_clone(&node->sample, sample);

    ovs_mutex_lock(&sgid_lock);

    if (id_pool_alloc_id(sample_group_ids, &node->id)) {
        cmap_insert(&sgid_map, &node->id_node, node->id);
    } else {
        VLOG_ERR("Can't find a free sample group ID");
        sgid_node_free(node);
        node = NULL;
    }

    ovs_mutex_unlock(&sgid_lock);

    return node ? node->id : 0;
}

> +}
> +
> +static void
> +sgid_free(uint32_t id)
> +{
> +    struct sgid_node *node;
> +
> +    if (!id) {
> +        return;
> +    }
> +
> +    ovs_mutex_lock(&sgid_lock);

I think the mutex can move to the 'if (node)' section as it only needs to 
protect the cmap remove/insert functions.

> +    node = sgid_find(id);
> +    if (node) {
> +        id_pool_free_id(sample_group_ids, node->id);

I think we should not free the id until the ovsrcu_postpone, or else it can 
exist for two instances at the same time. So it should be moved to the 
sgid_node_free() function, i.e.:

 sgid_node_free(struct sgid_node *node)
 {
     if (node) {
+        if (node->id) {
+            ovs_mutex_lock(&sgid_lock);
+            id_pool_free_id(sample_group_ids, node->id);
+            ovs_mutex_unlock(&sgid_lock);
+        }
+

         + ovs_mutex_lock(&sgid_lock);
> +        cmap_remove(&sgid_map, &node->id_node, node->id);
         + ovs_mutex_unlock(&sgid_lock);
> +        ovsrcu_postpone(sgid_node_free, node);
> +    } else {
> +        VLOG_ERR("Freeing nonexistent sample group ID: %"PRIu32, id);
> +    }
> +    ovs_mutex_unlock(&sgid_lock);

Unlock can move up.

> +}
> +
>  static bool
>  is_internal_port(const char *type)
>  {
> @@ -192,12 +295,17 @@ static struct netlink_field set_flower_map[][4] = {
>
>  static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
>
> +#define MAX_TC_SAMPLES_PER_FLOW 1
> +
>  /**
>   * struct ufid_tc_data - data entry for ufid-tc hashmaps.
>   * @ufid_to_tc_node: Element in @ufid_to_tc hash table by ufid key.
>   * @tc_to_ufid_node: Element in @tc_to_ufid hash table by tcf_id key.
>   * @ufid: ufid assigned to the flow
>   * @id: tc filter id (tcf_id)
> + * @sample_group_id: mapping id for sample actions. Currently only support
> +                     sFlow. Use array to support other sample types in the
> +                     future.

How about:

 * @sample_group_id: Mapping id for sample actions. Currently only supports
                     a single sFlow action. Use an array to support multiple
                     similar actions or additional sample types in the future.

>   * @netdev: netdev associated with the tc rule
>   * @adjust_stats: When flow gets updated with new actions, we need to adjust
>   *                the reported stats to include previous values as the 
> hardware
> @@ -208,6 +316,7 @@ struct ufid_tc_data {
>      struct hmap_node tc_to_ufid_node;
>      ovs_u128 ufid;
>      struct tcf_id id;
> +    uint32_t sample_group_id[MAX_TC_SAMPLES_PER_FLOW];
>      struct netdev *netdev;
>      struct dpif_flow_stats adjust_stats;
>  };
> @@ -231,6 +340,8 @@ del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
>      hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
>      hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
>      netdev_close(data->netdev);
> +    sgid_free(data->sample_group_id[0]);
> +    data->sample_group_id[0] = 0;

No need to set it to 0, as we free it below.

>      free(data);
>  }
>
> @@ -2128,6 +2239,11 @@ netdev_tc_parse_nl_actions(struct netdev *netdev, 
> struct tc_flower *flower,
>              if (err) {
>                  return err;
>              }
> +        } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
> +            struct offload_sample sample;
> +
> +            memset(&sample, 0, sizeof sample);

We need the  nullable_xmemdup() above, or the below sgid_alloc() would crash.

> +            sgid_alloc(&sample);

Maybe add 'return EOPNOTSUPP;' here because if people are doing a git bisect 
this might result in tests failing.

>          } else {
>              VLOG_DBG_RL(&rl, "unsupported put action type: %d",
>                          nl_attr_type(nla));
> @@ -2871,6 +2987,9 @@ netdev_tc_init_flow_api(struct netdev *netdev)
>          tc_cleanup_policer_actions(meter_police_ids, METER_POLICE_IDS_BASE,
>                                     METER_POLICE_IDS_MAX);
>          ovs_mutex_unlock(&meter_police_ids_mutex);
> +        ovs_mutex_lock(&sgid_lock);
> +        sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
> +        ovs_mutex_unlock(&sgid_lock);
>
>          ovsthread_once_done(&once);
>      }
> -- 
> 2.26.3

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to