On 2/23/23 12:26, Chris Mi wrote:
> When offloading sample action to TC, userspace creates a unique ID
> to map sFlow action and tunnel info and passes this ID to kernel
> instead of the sFlow info. Psample will send this ID and sampled
> packet to userspace. Using the ID, userspace can recover the sFlow
> info and send sampled packet to the right sFlow monitoring host.
>
> Signed-off-by: Chris Mi <[email protected]>
> Reviewed-by: Roi Dayan <[email protected]>
> ---
> lib/netdev-offload-tc.c | 223 +++++++++++++++++++++++++++++++++++++++-
> lib/tc.h | 1 +
> 2 files changed, 222 insertions(+), 2 deletions(-)
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 4fb9d9f21..0dbb7954f 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -41,6 +41,7 @@
> #include "unaligned.h"
> #include "util.h"
> #include "dpif-provider.h"
> +#include "cmap.h"
>
> VLOG_DEFINE_THIS_MODULE(netdev_offload_tc);
>
> @@ -103,6 +104,218 @@ static void parse_tc_flower_to_stats(struct tc_flower
> *flower,
> static int get_ufid_adjust_stats(const ovs_u128 *ufid,
> struct dpif_flow_stats *stats);
>
> +/* When offloading sample action to TC, userspace creates a unique ID
> + * to map sFlow action and tunnel info and passes this ID to kernel
> + * instead of the sFlow info. Psample will send this ID and sampled
> + * packet to userspace. Using the ID, userspace can recover the sFlow
> + * info and send sampled packet to the right sFlow monitoring host.
> + */
> +struct offload_sflow {
> + struct nlattr *action; /* SFlow action. Used in flow_get. */
> + struct nlattr *userdata; /* Struct user_action_cookie. */
> + const struct nlattr *actions; /* All actions to get output tunnel. */
> + struct flow_tnl *tunnel; /* Input tunnel. */
> + ovs_u128 ufid; /* Flow ufid. */
> +};
> +
> +/* This maps a psample group ID to struct offload_sflow */
Period at the end of a comment.
> +struct sgid_node {
> + struct cmap_node metadata_node;
> + struct cmap_node id_node;
> + struct ovs_refcount refcount;
> + uint32_t hash;
> + uint32_t id;
> + const struct offload_sflow sflow;
Why this field is marked as const?
> +};
> +
> +static struct ovs_rwlock sgid_rwlock = OVS_RWLOCK_INITIALIZER;
Why it is an rwlock and not just mutex?
It doesn't seem to be used as a read lock.
> +static struct cmap sgid_map = CMAP_INITIALIZER;
> +static struct cmap sgid_metadata_map = CMAP_INITIALIZER;
> +static struct id_pool *sample_group_ids OVS_GUARDED_BY(sgid_rwlock);
> +
> +static void
> +sgid_node_free(struct sgid_node *node)
> +{
free-like functions should handle a NULL pointer.
> + free(node->sflow.tunnel);
> + free(CONST_CAST(void *, node->sflow.action));
> + free(CONST_CAST(void *, node->sflow.actions));
> + free(CONST_CAST(void *, node->sflow.userdata));
> + free(node);
> +}
> +
> +static const struct sgid_node *
> +sgid_find(uint32_t id)
> +{
> + const struct cmap_node *node = cmap_find(&sgid_map, id);
> +
> + return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL;
> +}
> +
> +static uint32_t
> +dpif_sflow_hash(const struct offload_sflow *sflow)
Please, replace the 'dpif' part with an appropriate prefix.
Same for other functions.
> +{
> + return hash_bytes(&sflow->ufid, sizeof sflow->ufid, 0);
> +}
> +
> +static bool
> +dpif_sflow_equal(const struct offload_sflow *a, const struct offload_sflow
> *b)
> +{
> + if (!ovs_u128_equals(a->ufid, b->ufid)) {
> + return false;
> + }
> + /* Action can't be NULL */
Period.
> + if (!a->action || !b->action) {
> + return false;
> + }
> + if (a->action->nla_len != b->action->nla_len
> + || memcmp(a->action, b->action, a->action->nla_len)) {
> + return false;
> + }
> + /* Actions is used to get output tunnel. It can be NULL. */
> + if ((a->actions && !b->actions) || (!a->actions && b->actions)) {
> + return false;
> + }
> + if ((a->actions && b->actions &&
> + a->actions->nla_len != b->actions->nla_len)
> + || memcmp(a->actions, b->actions, a->actions->nla_len)) {
> + return false;
> + }
> + /* Tunnel is used to get input tunnel. It can be NULL. */
> + if (!a->tunnel && !b->tunnel) {
> + return true;
> + }
> + if (!a->tunnel || !b->tunnel) {
> + return false;
> + }
> + return !memcmp(a->tunnel, b->tunnel, sizeof *a->tunnel);
> +}
> +
> +static struct sgid_node *
> +sgid_find_equal(const struct offload_sflow *target, uint32_t hash)
> +{
> + struct sgid_node *node;
> +
> + CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &sgid_metadata_map) {
> + if (dpif_sflow_equal(&node->sflow, target)) {
> + return node;
> + }
> + }
> + return NULL;
> +}
> +
> +static struct sgid_node *
> +sgid_ref_equal(const struct offload_sflow *target, uint32_t hash)
> +{
> + struct sgid_node *node;
> +
> + do {
> + node = sgid_find_equal(target, hash);
> + /* Try again if the node was released before we get the reference. */
> + } while (node && !ovs_refcount_try_ref_rcu(&node->refcount));
> +
> + return node;
> +}
> +
> +static void
> +dpif_sflow_clone(struct offload_sflow *new, const struct offload_sflow *old)
Usually, clone-like functions accept one argument and return
a pointer to the newly allocated copy.
> +{
> + new->action = xmemdup(old->action, old->action->nla_len);
> + new->actions = old->actions
> + ? xmemdup(old->actions, old->actions->nla_len)
> + : NULL;
> + new->userdata = xmemdup(old->userdata, old->userdata->nla_len);
> + new->tunnel = old->tunnel
> + ? xmemdup(old->tunnel, sizeof *old->tunnel)
> + : NULL;
> + new->ufid = old->ufid;
> +}
> +
> +/* Allocate a unique group id for the given set of flow metadata. The id
> + * space is 2^^32 - 1. 0 is reserved.
> + */
> +static struct sgid_node *
> +sgid_alloc__(const struct offload_sflow *sflow, uint32_t hash)
The '__' suffix seems unnecessary.
> +{
> + struct sgid_node *node = xzalloc(sizeof *node);
> + bool ret;
> +
> + node->hash = hash;
> + ovs_refcount_init(&node->refcount);
> + dpif_sflow_clone(CONST_CAST(struct offload_sflow *,
> + &node->sflow), sflow);
> +
> + ovs_rwlock_wrlock(&sgid_rwlock);
> + ret = id_pool_alloc_id(sample_group_ids, &node->id);
> + if (!ret) {
> + VLOG_ERR("Can't find a free group ID");
> + ovs_rwlock_unlock(&sgid_rwlock);
> + sgid_node_free(node);
> + return NULL;
> + }
> +
> + cmap_insert(&sgid_map, &node->id_node, node->id);
> + cmap_insert(&sgid_metadata_map, &node->metadata_node, node->hash);
> + ovs_rwlock_unlock(&sgid_rwlock);
> + return node;
> +}
> +
> +/* Allocate a unique group id for the given set of flow metadata and
> + * optional actions.
> + */
> +static uint32_t
> +sgid_alloc_ctx(const struct offload_sflow *sflow)
> +{
> + uint32_t hash = dpif_sflow_hash(sflow);
> + struct sgid_node *node = sgid_ref_equal(sflow, hash);
> +
> + if (!node) {
> + node = sgid_alloc__(sflow, hash);
> + }
> +
> + return node ? node->id : 0;
> +}
> +
> +static void
> +sgid_node_unref(const struct sgid_node *node_)
unref() function should not have a const argument.
> + OVS_EXCLUDED(sgid_rwlock)
> +{
> + struct sgid_node *node = CONST_CAST(struct sgid_node *, node_);
> +
> + ovs_rwlock_wrlock(&sgid_rwlock);
> + if (node && ovs_refcount_unref(&node->refcount) == 1) {
> + id_pool_free_id(sample_group_ids, node->id);
> + cmap_remove(&sgid_metadata_map, &node->metadata_node, node->hash);
> + cmap_remove(&sgid_map, &node->id_node, node->id);
> + ovsrcu_postpone(sgid_node_free, node);
> + }
> + ovs_rwlock_unlock(&sgid_rwlock);
> +}
> +
> +static void
> +sgid_free(uint32_t id)
> +{
> + const struct sgid_node *node;
> +
> + if (!id) {
> + return;
> + }
> +
> + node = sgid_find(id);
> + if (node) {
> + sgid_node_unref(node);
> + } else {
> + VLOG_ERR("Freeing nonexistent group ID: %"PRIu32, id);
> + }
> +}
> +
> +static int
> +tc_del_flower_filter_and_sgid(struct tcf_id *id)
> +{
> + sgid_free(id->sample_group_id);
> + id->sample_group_id = 0;
> + return tc_del_flower_filter(id);
> +}
> +
> static bool
> is_internal_port(const char *type)
> {
> @@ -275,7 +488,7 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const
> ovs_u128 *ufid,
> }
> }
>
> - err = tc_del_flower_filter(id);
> + err = tc_del_flower_filter_and_sgid(id);
> if (!err) {
> del_ufid_tc_mapping(ufid);
> }
> @@ -545,7 +758,7 @@ netdev_tc_flow_flush(struct netdev *netdev)
> continue;
> }
>
> - err = tc_del_flower_filter(&data->id);
> + err = tc_del_flower_filter_and_sgid(&data->id);
> if (!err) {
> del_ufid_tc_mapping_unlocked(&data->ufid);
> }
> @@ -2118,6 +2331,11 @@ netdev_tc_parse_nl_actions(struct netdev *netdev,
> struct tc_flower *flower,
> if (err) {
> return err;
> }
> + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SAMPLE) {
> + struct offload_sflow sflow;
> +
> + memset(&sflow, 0, sizeof sflow);
> + sgid_alloc_ctx(&sflow);
> } else {
> VLOG_DBG_RL(&rl, "unsupported put action type: %d",
> nl_attr_type(nla));
> @@ -2850,6 +3068,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
> ovs_mutex_lock(&meter_police_ids_mutex);
> meter_police_ids = id_pool_create(METER_POLICE_IDS_BASE,
> METER_POLICE_IDS_MAX - METER_POLICE_IDS_BASE +
> 1);
> + sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
> tc_cleanup_policer_actions(meter_police_ids, METER_POLICE_IDS_BASE,
> METER_POLICE_IDS_MAX);
> ovs_mutex_unlock(&meter_police_ids_mutex);
> diff --git a/lib/tc.h b/lib/tc.h
> index cdd3b4f60..0d9b1b8e7 100644
> --- a/lib/tc.h
> +++ b/lib/tc.h
> @@ -312,6 +312,7 @@ struct tcf_id {
> uint32_t chain;
> uint16_t prio;
> uint32_t handle;
> + uint32_t sample_group_id;
> };
>
> static inline struct tcf_id
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev