On 29 Mar 2023, at 13:42, Chris Mi wrote:

> Initialize psample socket. Add sample recv API to receive sampled
> packets from psample socket. Add sample recv wait API to add psample
> socket fd to poll list.

Thanks for the update, see comments inline.

//Eelco

> Signed-off-by: Chris Mi <[email protected]>
> Reviewed-by: Roi Dayan <[email protected]>
> ---
>  lib/dpif.h                    |   6 +-
>  lib/netdev-offload-provider.h |  17 +++
>  lib/netdev-offload-tc.c       | 192 ++++++++++++++++++++++++++++++++++
>  lib/netdev-offload.c          |   3 +-
>  4 files changed, 215 insertions(+), 3 deletions(-)
>
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 129cbf6a1..5b094ce68 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -834,8 +834,10 @@ struct dpif_upcall {
>
>      /* DPIF_UC_ACTION only. */
>      struct nlattr *userdata;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
> -    struct nlattr *out_tun_key;    /* Output tunnel key. */
> -    struct nlattr *actions;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
> +    struct nlattr *out_tun_key; /* Output tunnel key. */
> +    struct nlattr *actions;     /* Argument to OVS_ACTION_ATTR_USERSPACE. */
> +    struct flow flow;           /* Caller provide 'flow' if 'key' is not
> +                                   available. */

"Caller provided 'flow' if the 'key' is not available."

Also, multiline comments should start with a *.

>  };
>
>  /* A callback to notify higher layer of dpif about to be purged, so that
> diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
> index 9108856d1..b3bf433d0 100644
> --- a/lib/netdev-offload-provider.h
> +++ b/lib/netdev-offload-provider.h
> @@ -28,6 +28,8 @@
>  extern "C" {
>  #endif
>
> +struct dpif_upcall;
> +
>  struct netdev_flow_api {
>      char *type;
>      /* Flush all offloaded flows from a netdev.
> @@ -121,6 +123,21 @@ struct netdev_flow_api {
>      int (*meter_del)(ofproto_meter_id meter_id,
>                       struct ofputil_meter_stats *stats);
>
> +    /* Polls for sample offload packets for an upcall handler. If successful,

This should be a general upcall handling API, so I would change this to:

/* Polls for upcall offload packets...

> +     * stores the upcall into '*upcall', using 'buf' for storage.
> +     *

Also missing this part as suggested earlier, as it should also apply to the 
code supplied here (does it?):

   * The implementation should point 'upcall->key' and 'upcall->userdata'
   * (if any) into data in the caller-provided 'buf'.  The implementation may
   * also use 'buf' for storing the data of 'upcall->packet'.  If necessary
   * to make room, the implementation may reallocate the data in 'buf'.
   *
   * The caller owns the data of 'upcall->packet' and may modify it.  If
   * packet's headroom is exhausted as it is manipulated, 'upcall->packet'
   * will be reallocated.  This requires the data of 'upcall->packet' to be
   * released with ofpbuf_uninit() before 'upcall' is destroyed.  However,
   * when an error is returned, the 'upcall->packet' may be uninitialized
   * and should not be released.

> +     * Return 0 if successful, otherwise returns a positive errno value.
> +     *
> +     * This function must not block. If no upcall is pending when it is
> +     * called, it should return EAGAIN without blocking. */
> +    int (*recv)(struct dpif_upcall *upcall, struct ofpbuf *buf,
> +                uint32_t handler_id);
> +
> +    /* Arranges for the poll loop for an upcall handler to wake up when
> +     * sample socket has a message queued to be received with the recv
> +     * member functions. */
> +    void (*recv_wait)(uint32_t handler_id);
> +
>      /* Initializies the netdev flow api.
>       * Return 0 if successful, otherwise returns a positive errno value. */
>      int (*init_flow_api)(struct netdev *);
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index c53ea5588..8c8f29af7 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -18,6 +18,8 @@
>
>  #include <errno.h>
>  #include <linux/if_ether.h>
> +#include <linux/psample.h>
> +#include <poll.h>
>
>  #include "dpif.h"
>  #include "hash.h"
> @@ -124,6 +126,9 @@ struct sgid_node {
>      struct offload_sample sample;
>  };
>
> +static struct nl_sock *psample_sock;
> +static int psample_family;
> +
>  static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER;
>  static struct cmap sgid_map = CMAP_INITIALIZER;
>  static struct id_pool *sample_group_ids OVS_GUARDED_BY(sgid_lock);
> @@ -148,6 +153,14 @@ sgid_find(uint32_t id)
>      return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL;
>  }
>
> +static struct offload_sample *
> +sample_find(uint32_t id)
> +{
> +    struct sgid_node *node = sgid_find(id);
> +
> +    return node ? &node->sample: NULL;
> +}
> +
>  static void
>  offload_sample_clone(struct offload_sample *new,
>                       const struct offload_sample *old)
> @@ -2931,6 +2944,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids,
>      hmap_destroy(&map);
>  }
>
> +static void
> +psample_init(void)
> +{
> +    unsigned int psample_mcgroup;
> +    int err;
> +
> +    if (!netdev_is_flow_api_enabled()) {
> +        VLOG_DBG("Flow API is not enabled");
> +        return;
> +    }
> +
> +    if (psample_sock) {
> +        VLOG_DBG("Psample socket is already initialized");
> +        return;
> +    }
> +
> +    err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
> +                                &psample_family);
> +    if (err) {
> +        VLOG_INFO("Generic Netlink family '%s' does not exist: %s\n"
> +                  "Please make sure the kernel module psample is loaded",
> +                  PSAMPLE_GENL_NAME, ovs_strerror(err));
> +        return;
> +    }
> +
> +    err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
> +                                 PSAMPLE_NL_MCGRP_SAMPLE_NAME,
> +                                 &psample_mcgroup);
> +    if (err) {
> +        VLOG_INFO("Failed to join Netlink multicast group '%s': %s",
> +                  PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
> +        return;
> +    }
> +
> +    err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
> +    if (err) {
> +        VLOG_INFO("Failed to create psample socket: %s", ovs_strerror(err));
> +        return;
> +    }
> +
> +    err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
> +    if (err) {
> +        VLOG_INFO("Failed to join psample mcgroup: %s", ovs_strerror(err));
> +        nl_sock_destroy(psample_sock);
> +        psample_sock = NULL;
> +        return;
> +    }
> +}
> +
>  static int
>  netdev_tc_init_flow_api(struct netdev *netdev)
>  {
> @@ -2990,6 +3052,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
>          ovs_mutex_lock(&sgid_lock);
>          sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
>          ovs_mutex_unlock(&sgid_lock);
> +        psample_init();
>
>          ovsthread_once_done(&once);
>      }
> @@ -3206,6 +3269,133 @@ meter_tc_del_policer(ofproto_meter_id meter_id,
>      return err;
>  }
>
> +struct offload_psample {
> +    struct nlattr *packet; /* Packet data. */
> +    uint32_t group_id;     /* Mapping id for sample offload. */
> +    uint16_t iifindex;     /* Input ifindex. */
> +};
> +
> +static int
> +nl_parse_psample(struct offload_psample *psample, struct ofpbuf *buf)
> +{
> +    static const struct nl_policy ovs_psample_policy[] = {
> +        [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16, .optional = true, },
> +        [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
> +        [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
> +        [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
> +    };
> +    struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
> +    struct genlmsghdr *genl;
> +    struct nlmsghdr *nlmsg;
> +    struct ofpbuf b;
> +
> +    b = ofpbuf_const_initializer(buf->data, buf->size);
> +    nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    genl = ofpbuf_try_pull(&b, sizeof *genl);
> +    if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
> +        || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
> +                            ARRAY_SIZE(ovs_psample_policy))) {
> +        return EINVAL;
> +    }
> +
> +    if (a[PSAMPLE_ATTR_IIFINDEX]) {
> +        psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
> +    }

Do } else { psample->iffindex = 0 }, to safe on the memset later.


> +    psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
> +    psample->packet = a[PSAMPLE_ATTR_DATA];
> +
> +    return 0;
> +}
> +
> +static int
> +psample_parse_packet(struct offload_psample *psample,
> +                     struct dpif_upcall *upcall)
> +{
> +    struct flow *flow = &upcall->flow;
> +    struct offload_sample *sample;
> +
> +    memset(upcall, 0, sizeof *upcall);
> +    dp_packet_use_const(&upcall->packet,
> +                        nl_attr_get(psample->packet),
> +                        nl_attr_get_size(psample->packet));
> +
> +    sample = sample_find(psample->group_id);
> +    if (!sample) {
> +        VLOG_ERR_RL(&error_rl, "failed to get sample info via group id: %d",

Capital F for failed.

> +                    psample->group_id);
> +        return ENOENT;
> +    }
> +
> +    upcall->userdata = sample->userdata;
> +    if (sample->tunnel) {
> +        memcpy(&flow->tunnel, sample->tunnel, sizeof flow->tunnel);

Are we ok memcpy will work here? We might need to initialize the sample->tunnel 
info correctly in the upcomming patches, see flow_tnl_copy__().

> +    }
> +    if (sample->userspace_actions) {
> +        upcall->actions = sample->userspace_actions;
> +    }
> +    if (psample->iifindex) {
> +        flow->in_port.odp_port = 
> netdev_ifindex_to_odp_port(psample->iifindex);
> +    }

If not provided I think the port should be set to ODPP_NONE (the default 
value), please confirm.
If this is not true the above netdev_ifindex_to_odp_port() should cause this 
function to fail if ODPP_NONE is returned.

> +    upcall->type = DPIF_UC_ACTION;
> +
> +    return 0;
> +}
> +
> +static int
> +netdev_tc_recv(struct dpif_upcall *upcall, struct ofpbuf *buf,
> +               uint32_t handler_id)
> +{
> +    int read_tries = 0;
> +
> +    if (handler_id) {
> +        return EAGAIN;
> +    }
> +
> +    if (!psample_sock) {
> +        return EAGAIN;
> +    }
> +

Combine the two if's in a single one 'if (handler_id || !psample_sock)'

> +    for (;;) {
> +        struct offload_psample psample;
> +        int error;
> +
> +        if (++read_tries > 50) {
> +            return EAGAIN;
> +        }
> +
> +        error = nl_sock_recv(psample_sock, buf, NULL, false);
> +        if (error == ENOBUFS) {
> +            continue;
> +        }
> +
> +        if (error) {
> +            if (error == EAGAIN) {
> +                break;

The outer loop returns EAGAIN, so why explicitly check/break, we can just 
return error, can't we?

> +            }
> +            return error;
> +        }
> +
> +        memset(&psample, 0, sizeof psample);

Remove memset here, see comment in nl_parse_psample.

> +        error = nl_parse_psample(&psample, buf);
> +
> +        return error ? error : psample_parse_packet(&psample, upcall);
> +    }
> +
> +    return EAGAIN;
> +}
> +
> +static void
> +netdev_tc_recv_wait(uint32_t handler_id)
> +{
> +    if (handler_id) {
> +        return;
> +    }
> +
> +    if (psample_sock) {
> +        nl_sock_wait(psample_sock, POLLIN);
> +    }
> +}

Add some comments that netlink will only do this in handler 0 for simplicity. 
Maybe something like:

+static void
+netdev_tc_recv_wait(uint32_t handler_id)
+{
+    /* For simplicity, i.e., using a single NetLink socket, only the first
+     * handler thread will be used. */
+    if (!handler_id && psample_sock) }
+        nl_sock_wait(psample_sock, POLLIN);
+    }
+}

> +
>  const struct netdev_flow_api netdev_offload_tc = {
>     .type = "linux_tc",
>     .flow_flush = netdev_tc_flow_flush,
> @@ -3219,5 +3409,7 @@ const struct netdev_flow_api netdev_offload_tc = {
>     .meter_set = meter_tc_set_policer,
>     .meter_get = meter_tc_get_policer,
>     .meter_del = meter_tc_del_policer,
> +   .recv = netdev_tc_recv,
> +   .recv_wait = netdev_tc_recv_wait,
>     .init_flow_api = netdev_tc_init_flow_api,
>  };
> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
> index 4592262bd..84503ec8b 100644
> --- a/lib/netdev-offload.c
> +++ b/lib/netdev-offload.c
> @@ -38,6 +38,7 @@
>  #include "netdev-provider.h"
>  #include "netdev-vport.h"
>  #include "odp-netlink.h"
> +#include "odp-util.h"
>  #include "openflow/openflow.h"
>  #include "packets.h"
>  #include "openvswitch/ofp-print.h"
> @@ -820,7 +821,7 @@ odp_port_t
>  netdev_ifindex_to_odp_port(int ifindex)
>  {
>      struct port_to_netdev_data *data;
> -    odp_port_t ret = 0;
> +    odp_port_t ret = ODPP_NONE;
>
>      ovs_rwlock_rdlock(&netdev_hmap_rwlock);
>      HMAP_FOR_EACH_WITH_HASH (data, ifindex_node, ifindex, &ifindex_to_port) {
> -- 
> 2.26.3

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to