On 2/23/23 12:26, Chris Mi wrote:
> Initialize psample socket. Add sFlow recv API to receive sampled
> packets from psample socket. Add sFow recv wait API to add psample
> socket fd to poll list.
> 
> Signed-off-by: Chris Mi <[email protected]>
> Reviewed-by: Roi Dayan <[email protected]>
> ---
>  lib/dpif.h                    |   7 ++
>  lib/netdev-offload-provider.h |  11 ++
>  lib/netdev-offload-tc.c       | 188 ++++++++++++++++++++++++++++++++++
>  3 files changed, 206 insertions(+)
> 
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 6cb4dae6d..95807af8f 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -836,6 +836,13 @@ struct dpif_upcall {
>      struct nlattr *userdata;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
>      struct nlattr *out_tun_key;    /* Output tunnel key. */
>      struct nlattr *actions;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
> +    /* SFlow offload only.
> +     * When receiving sampled packets from psample socket, there is no
> +     * flow key. But input tunnel and input ifindex are available. They
> +     * are enough to construct flow and continue to process sFlow.
> +     */
> +    struct flow_tnl *in_tun;    /* Input tunnel key. */
> +    uint32_t iifindex;          /* Input ifindex. */

These should be replaced with a struct flow pointer.  See the comments
on patch #6.

And we don't need ofload-specific comments.  We just need to allow
datapath implementations to return either key or struct flow.

>  };
>  
>  /* A callback to notify higher layer of dpif about to be purged, so that
> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
> index 9108856d1..9e2722fd1 100644
> --- a/lib/netdev-offload-provider.h
> +++ b/lib/netdev-offload-provider.h
> @@ -121,6 +121,17 @@ struct netdev_flow_api {
>      int (*meter_del)(ofproto_meter_id meter_id,
>                       struct ofputil_meter_stats *stats);
>  
> +    /* Receives sampled packets in 'buf' from psample socket and fill the
> +     * necessary members in 'upcall'.
> +     * Return 0 if successful, otherwise returns a positive errno value.
> +     */
> +    int (*sflow_recv)(struct dpif_upcall *upcall, struct ofpbuf *buf);
> +
> +    /* Add psample socket fd to poll list. Wake the upcall thread up to
> +     * process it if there is any sampled packets,
> +     */
> +    void (*sflow_recv_wait)(void);

These should be generic callbacks, not tied to psample or sflow.
There might be other reasons offload implementation wants to
send a packet to userspace.  Also, psample doesn't make sense for
other offload implementations.  So, just recv() and recv_wait().


> +
>      /* Initializies the netdev flow api.
>       * Return 0 if successful, otherwise returns a positive errno value. */
>      int (*init_flow_api)(struct netdev *);
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 0dbb7954f..d7901fa68 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -18,6 +18,8 @@
>  
>  #include <errno.h>
>  #include <linux/if_ether.h>
> +#include <linux/psample.h>
> +#include <sys/poll.h>

Just <poll.h>, no sys.

>  
>  #include "dpif.h"
>  #include "hash.h"
> @@ -104,6 +106,9 @@ static void parse_tc_flower_to_stats(struct tc_flower 
> *flower,
>  static int get_ufid_adjust_stats(const ovs_u128 *ufid,
>                                   struct dpif_flow_stats *stats);
>  
> +static struct nl_sock *psample_sock;
> +static int psample_family;
> +
>  /* When offloading sample action to TC, userspace creates a unique ID
>   * to map sFlow action and tunnel info and passes this ID to kernel
>   * instead of the sFlow info. Psample will send this ID and sampled
> @@ -151,6 +156,19 @@ sgid_find(uint32_t id)
>      return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL;
>  }
>  
> +static struct offload_sflow *
> +sflow_find(uint32_t id)
> +{
> +    const struct sgid_node *node;
> +
> +    node = sgid_find(id);
> +    if (!node) {
> +        return NULL;
> +    }
> +
> +    return CONST_CAST(struct offload_sflow *, &node->sflow);
> +}
> +
>  static uint32_t
>  dpif_sflow_hash(const struct offload_sflow *sflow)
>  {
> @@ -3015,6 +3033,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids,
>      hmap_destroy(&map);
>  }
>  
> +static void
> +psample_init(void)
> +{
> +    unsigned int psample_mcgroup;
> +    int err;
> +
> +    if (!netdev_is_flow_api_enabled()) {
> +        VLOG_DBG("Flow API is not enabled.");
> +        return;
> +    }
> +
> +    if (psample_sock) {
> +        VLOG_DBG("Psample socket is already initialized.");
> +        return;
> +    }
> +
> +    err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
> +                                &psample_family);
> +    if (err) {
> +        VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n"
> +                  "Please make sure the kernel module psample is loaded.",
> +                  PSAMPLE_GENL_NAME, ovs_strerror(err));
> +        return;
> +    }
> +
> +    err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
> +                                 PSAMPLE_NL_MCGRP_SAMPLE_NAME,
> +                                 &psample_mcgroup);
> +    if (err) {
> +        VLOG_WARN("Failed to join Netlink multicast group '%s': %s",
> +                  PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
> +        return;
> +    }
> +
> +    err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
> +    if (err) {
> +        VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err));
> +        return;
> +    }
> +
> +    err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
> +    if (err) {
> +        VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err));
> +        nl_sock_destroy(psample_sock);
> +        psample_sock = NULL;
> +        return;
> +    }
> +}
> +
>  static int
>  netdev_tc_init_flow_api(struct netdev *netdev)
>  {
> @@ -3069,6 +3136,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
>          meter_police_ids = id_pool_create(METER_POLICE_IDS_BASE,
>                              METER_POLICE_IDS_MAX - METER_POLICE_IDS_BASE + 
> 1);
>          sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
> +        psample_init();

Having 2 lines above under meter_police_ids_mutex doesn't make
a lot of sense.

>          tc_cleanup_policer_actions(meter_police_ids, METER_POLICE_IDS_BASE,
>                                     METER_POLICE_IDS_MAX);
>          ovs_mutex_unlock(&meter_police_ids_mutex);
> @@ -3288,6 +3356,124 @@ meter_tc_del_policer(ofproto_meter_id meter_id,
>      return err;
>  }
>  
> +struct offload_psample {
> +    struct nlattr *packet; /* Packet data. */
> +    int group_id;          /* Mapping id for sFlow offload. */
> +    int iifindex;          /* Input ifindex. */
> +};
> +
> +static int
> +psample_from_ofpbuf(struct offload_psample *psample,
> +                    struct ofpbuf *buf)
> +{
> +    static const struct nl_policy ovs_psample_policy[] = {
> +        [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 },
> +        [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
> +        [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
> +        [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
> +    };
> +    struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
> +    struct genlmsghdr *genl;
> +    struct nlmsghdr *nlmsg;
> +    struct ofpbuf b;
> +
> +    b = ofpbuf_const_initializer(buf->data, buf->size);
> +    nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    genl = ofpbuf_try_pull(&b, sizeof *genl);
> +    if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
> +        || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
> +                            ARRAY_SIZE(ovs_psample_policy))) {
> +        return EINVAL;
> +    }
> +
> +    psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
> +    psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
> +    psample->packet = a[PSAMPLE_ATTR_DATA];
> +
> +    return 0;
> +}
> +
> +static int
> +psample_parse_packet(struct offload_psample *psample,
> +                     struct offload_sflow *sflow,

Why 'sflow' is an argument here?

> +                     struct dpif_upcall *upcall)
> +{
> +    dp_packet_use_stub(&upcall->packet,
> +                       CONST_CAST(struct nlattr *,
> +                                  nl_attr_get(psample->packet)) - 1,
> +                       nl_attr_get_size(psample->packet) +
> +                       sizeof(struct nlattr));
> +    dp_packet_set_data(&upcall->packet,
> +                       (char *) dp_packet_data(&upcall->packet) +
> +                       sizeof(struct nlattr));
> +    dp_packet_set_size(&upcall->packet, nl_attr_get_size(psample->packet));
> +
> +    sflow = sflow_find(psample->group_id);
> +    if (!sflow) {
> +        return ENOENT;
> +    }
> +
> +    upcall->key = NULL;
> +    upcall->key_len = 0;
> +    upcall->ufid = sflow->ufid;
> +    upcall->userdata = sflow->userdata;
> +    upcall->actions = CONST_CAST(struct nlattr *, sflow->actions);
> +    upcall->in_tun = sflow->tunnel;
> +    upcall->iifindex = psample->iifindex;
> +    upcall->type = DPIF_UC_ACTION;
> +
> +    return 0;
> +}
> +
> +static int
> +sflow_psample_recv(struct dpif_upcall *upcall, struct ofpbuf *buf)

Handler id should be an argument here.  And we should check that
it is zero.  Upper layers do not/should not know how many sockets
we have here.

And 'sflow' should probably not be in the name.

> +{
> +    int read_tries = 0;
> +
> +    if (!psample_sock) {
> +        return ENOENT;
> +    }
> +
> +    for (;;) {
> +        struct offload_psample psample;
> +        struct offload_sflow sflow;
> +        int error;
> +
> +        if (++read_tries > 50) {
> +            return EAGAIN;
> +        }
> +
> +        error = nl_sock_recv(psample_sock, buf, NULL, false);
> +        if (error == ENOBUFS) {
> +            continue;
> +        }
> +
> +        if (error) {
> +            if (error == EAGAIN) {
> +                break;
> +            }
> +            return error;
> +        }
> +
> +        error = psample_from_ofpbuf(&psample, buf);
> +        if (!error) {
> +            return psample_parse_packet(&psample, &sflow, upcall);
> +        } else if (error) {

Condition here is always true.

> +            return error;
> +        }
> +    }
> +
> +    return EAGAIN;
> +}
> +
> +static void
> +sflow_psample_recv_wait(void)

Same.  Handler id.  Name.

> +{
> +    if (psample_sock) {
> +        nl_sock_wait(psample_sock, POLLIN);
> +    }
> +}
> +
>  const struct netdev_flow_api netdev_offload_tc = {
>     .type = "linux_tc",
>     .flow_flush = netdev_tc_flow_flush,
> @@ -3301,5 +3487,7 @@ const struct netdev_flow_api netdev_offload_tc = {
>     .meter_set = meter_tc_set_policer,
>     .meter_get = meter_tc_get_policer,
>     .meter_del = meter_tc_del_policer,
> +   .sflow_recv = sflow_psample_recv,
> +   .sflow_recv_wait = sflow_psample_recv_wait,
>     .init_flow_api = netdev_tc_init_flow_api,
>  };

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to