On 1 Mar 2023, at 8:22, Chris Mi wrote: > When offloading sample action to TC, userspace creates a unique ID > to map sFlow action and tunnel info and passes this ID to kernel > instead of the sFlow info. Psample will send this ID and sampled > packet to userspace. Using the ID, userspace can recover the sFlow > info and send sampled packet to the right sFlow monitoring host.
See some comments inline below. //Eelco > Signed-off-by: Chris Mi <c...@nvidia.com> > Reviewed-by: Roi Dayan <r...@nvidia.com> > --- > lib/netdev-offload-tc.c | 233 +++++++++++++++++++++++++++++++++++++++- > lib/tc.h | 1 + > 2 files changed, 232 insertions(+), 2 deletions(-) > > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c > index 4fb9d9f21..4214337d8 100644 > --- a/lib/netdev-offload-tc.c > +++ b/lib/netdev-offload-tc.c > @@ -41,6 +41,7 @@ > #include "unaligned.h" > #include "util.h" > #include "dpif-provider.h" > +#include "cmap.h" > > VLOG_DEFINE_THIS_MODULE(netdev_offload_tc); > > @@ -103,6 +104,226 @@ static void parse_tc_flower_to_stats(struct tc_flower > *flower, > static int get_ufid_adjust_stats(const ovs_u128 *ufid, > struct dpif_flow_stats *stats); > > +/* When offloading sample action to TC, userspace creates a unique ID > + * to map sFlow action and tunnel info and passes this ID to kernel > + * instead of the sFlow info. Psample will send this ID and sampled > + * packet to userspace. Using the ID, userspace can recover the sFlow > + * info and send sampled packet to the right sFlow monitoring host. > + */ > +struct offload_sflow { Should we maybe already give this a more general name, like offload_sample? This as you need the same structure later when you add support of offload sent to a controller? We might need an offload_type field. > + struct nlattr *action; /* SFlow action. Used in flow_get. */ > + struct nlattr *userdata; /* Struct user_action_cookie. */ > + struct nlattr *actions; /* All actions to get output tunnel. */ You should not need actions at all, see comments later. These are the next actions, which are not of interest to sflow offload. > + struct flow_tnl *tunnel; /* Input tunnel. */ > + ovs_u128 ufid; /* Flow ufid. */ > +}; > + > +/* This maps a psample group ID to struct offload_sflow. */ > +struct sgid_node { > + struct cmap_node metadata_node; > + struct cmap_node id_node; > + struct ovs_refcount refcount; > + uint32_t hash; > + uint32_t id; > + struct offload_sflow sflow; > +}; > + > +static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER; > +static struct cmap sgid_map = CMAP_INITIALIZER; > +static struct cmap sgid_metadata_map = CMAP_INITIALIZER; > +static struct id_pool *sample_group_ids OVS_GUARDED_BY(sgid_lock); > + > +static void > +sgid_node_free(struct sgid_node *node) > +{ > + if (node) { > + if (node->sflow.tunnel) { This additional null check (and below) is not needed as free() will do this internally. > + free(node->sflow.tunnel); > + } > + if (node->sflow.action) { > + free(node->sflow.action); > + } > + if (node->sflow.actions) { > + free(node->sflow.actions); > + } > + if (node->sflow.userdata) { > + free(node->sflow.userdata); > + } > + free(node); > + } > +} > + > +static struct sgid_node * > +sgid_find(uint32_t id) > +{ > + const struct cmap_node *node = cmap_find(&sgid_map, id); > + > + return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL; > +} > + > +static uint32_t > +offload_sflow_hash(const struct offload_sflow *sflow) > +{ > + return hash_bytes(&sflow->ufid, sizeof sflow->ufid, 0); > +} > +static bool > +offload_sflow_equal(const struct offload_sflow *a, > + const struct offload_sflow *b) > +{ > + if (!ovs_u128_equals(a->ufid, b->ufid)) { > + return false; > + } > + /* Action can't be NULL. */ > + if (!a->action || !b->action) { > + return false; > + } > + if (a->action->nla_len != b->action->nla_len > + || memcmp(a->action, b->action, a->action->nla_len)) { > + return false; > + } > + /* Actions is used to get output tunnel. It can be NULL. */ > + if ((a->actions && !b->actions) || (!a->actions && b->actions)) { > + return false; > + } > + if ((a->actions && b->actions && > + a->actions->nla_len != b->actions->nla_len) > + || memcmp(a->actions, b->actions, a->actions->nla_len)) { In theory, a->actions and b->actions can be NULL in this case, if a->actions->nla_len != 0 for some odd reason, it will lead to a crash. > + return false; > + } > + /* Tunnel is used to get input tunnel. It can be NULL. */ > + if (!a->tunnel && !b->tunnel) { > + return true; > + } > + if (!a->tunnel || !b->tunnel) { > + return false; > + } > + return !memcmp(a->tunnel, b->tunnel, sizeof *a->tunnel); > +} > + > +static struct sgid_node * > +sgid_find_equal(const struct offload_sflow *target, uint32_t hash) > +{ > + struct sgid_node *node; > + > + CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &sgid_metadata_map) { > + if (offload_sflow_equal(&node->sflow, target)) { > + return node; > + } > + } > + return NULL; > +} > + > +static struct sgid_node * > +sgid_ref_equal(const struct offload_sflow *target, uint32_t hash) > +{ > + struct sgid_node *node; > + > + do { > + node = sgid_find_equal(target, hash); > + /* Try again if the node was released before we get the reference. */ > + } while (node && !ovs_refcount_try_ref_rcu(&node->refcount)); It might be too late in the afternoon and need some more coffee, but if the node was released, i.e. refcount zero, would it not loop eternally as gid_find_equal is not checking the refcount? > + > + return node; > +} > + > +static void > +offload_sflow_clone(struct offload_sflow *new, const struct offload_sflow > *old) See the earlier comment on the naming, if we re-use this for general psample (for sent to userspace), we might want to make the naming more general. > +{ > + new->action = xmemdup(old->action, old->action->nla_len); > + new->actions = old->actions > + ? xmemdup(old->actions, old->actions->nla_len) > + : NULL; > + new->userdata = xmemdup(old->userdata, old->userdata->nla_len); > + new->tunnel = old->tunnel > + ? xmemdup(old->tunnel, sizeof *old->tunnel) > + : NULL; > + new->ufid = old->ufid; > +} > + > +/* Allocate a unique group id for the given set of flow metadata. The id > + * space is 2^^32 - 1. 0 is reserved. > + */ > +static struct sgid_node * > +sgid_alloc(const struct offload_sflow *sflow, uint32_t hash) > +{ > + struct sgid_node *node = xzalloc(sizeof *node); > + bool ret; > + There is no protection for two threads trying to insert the same sflow structure? Once you determined the node either does not exists or has a ref count of 0 in sgid_alloc_ctx(). > + node->hash = hash; > + ovs_refcount_init(&node->refcount); > + offload_sflow_clone(&node->sflow, sflow); > + > + ovs_mutex_lock(&sgid_lock); > + ret = id_pool_alloc_id(sample_group_ids, &node->id); > + if (!ret) { > + VLOG_ERR("Can't find a free group ID"); I would make this “Can't find a free sample group ID" > + ovs_mutex_unlock(&sgid_lock); > + sgid_node_free(node); > + return NULL; > + } > + > + cmap_insert(&sgid_map, &node->id_node, node->id); > + cmap_insert(&sgid_metadata_map, &node->metadata_node, node->hash); > + ovs_mutex_unlock(&sgid_lock); > + return node; > +} > + > +/* Allocate a unique group id for the given set of flow metadata and > + * optional actions. > + */ So assuming the ufid is part of the offload_sflow data and we only support a single sample per flow we will probably not re-use the metadata. So I guess this only makes sense if we support multiple offloads per flow in the future am I right? > +static uint32_t > +sgid_alloc_ctx(const struct offload_sflow *sflow) > +{ > + uint32_t hash = offload_sflow_hash(sflow); > + struct sgid_node *node = sgid_ref_equal(sflow, hash); > + > + if (!node) { > + node = sgid_alloc(sflow, hash); > + } > + > + return node ? node->id : 0; > +} > + > +static void > +sgid_node_unref(struct sgid_node *node) > + OVS_EXCLUDED(sgid_lock) > +{ > + ovs_mutex_lock(&sgid_lock); > + if (node && ovs_refcount_unref(&node->refcount) == 1) { > + id_pool_free_id(sample_group_ids, node->id); > + cmap_remove(&sgid_metadata_map, &node->metadata_node, node->hash); > + cmap_remove(&sgid_map, &node->id_node, node->id); > + ovsrcu_postpone(sgid_node_free, node); > + } > + ovs_mutex_unlock(&sgid_lock); > +} > + > +static void > +sgid_free(uint32_t id) > +{ > + struct sgid_node *node; > + > + if (!id) { > + return; > + } > + > + node = sgid_find(id); > + if (node) { > + sgid_node_unref(node); > + } else { > + VLOG_ERR("Freeing nonexistent group ID: %"PRIu32, id); "Freeing nonexistent sample group ID:" > + } > +} > + > +static int > +tc_del_flower_filter_and_sgid(struct tcf_id *id) > +{ I do not think sample_group_id should be part of tcf_id, but it should be part of ufid_tc_data, and then it can be cleaned up in the del_ufid_tc_mapping_unlocked() function. If sample_groupd_id != 0. > + sgid_free(id->sample_group_id); > + id->sample_group_id = 0; > + return tc_del_flower_filter(id); > +} > + > static bool > is_internal_port(const char *type) > { > @@ -275,7 +496,7 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const > ovs_u128 *ufid, > } > } > > - err = tc_del_flower_filter(id); > + err = tc_del_flower_filter_and_sgid(id); > if (!err) { > del_ufid_tc_mapping(ufid); > } > @@ -545,7 +766,7 @@ netdev_tc_flow_flush(struct netdev *netdev) > continue; > } > > - err = tc_del_flower_filter(&data->id); > + err = tc_del_flower_filter_and_sgid(&data->id); > if (!err) { > del_ufid_tc_mapping_unlocked(&data->ufid); > } > @@ -2118,6 +2339,11 @@ netdev_tc_parse_nl_actions(struct netdev *netdev, > struct tc_flower *flower, > if (err) { > return err; > } > + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) { > + struct offload_sflow sflow; > + > + memset(&sflow, 0, sizeof sflow); > + sgid_alloc_ctx(&sflow); > } else { > VLOG_DBG_RL(&rl, "unsupported put action type: %d", > nl_attr_type(nla)); > @@ -2853,6 +3079,9 @@ netdev_tc_init_flow_api(struct netdev *netdev) > tc_cleanup_policer_actions(meter_police_ids, METER_POLICE_IDS_BASE, > METER_POLICE_IDS_MAX); > ovs_mutex_unlock(&meter_police_ids_mutex); > + ovs_mutex_lock(&sgid_lock); > + sample_group_ids = id_pool_create(1, UINT32_MAX - 1); > + ovs_mutex_unlock(&sgid_lock); > > ovsthread_once_done(&once); > } > diff --git a/lib/tc.h b/lib/tc.h > index cdd3b4f60..0d9b1b8e7 100644 > --- a/lib/tc.h > +++ b/lib/tc.h > @@ -312,6 +312,7 @@ struct tcf_id { > uint32_t chain; > uint16_t prio; > uint32_t handle; > + uint32_t sample_group_id; See above, but I think this should be part of struct ufid_to_tc_data. In addition, I think we should make this an array of MAX_TC_SAMPLES_PER_FLOW = 2, so we are ready for supporting sflow + offload to the controller. > }; > > static inline struct tcf_id > -- > 2.26.3 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev