On 6/24/2023 4:17 AM, Ilya Maximets wrote:
On 6/19/23 07:05, Chris Mi wrote:
Initialize psample socket. Add sample recv API to receive sampled
packets from psample socket. Add sample recv wait API to add psample
socket fd to poll list.

Signed-off-by: Chris Mi <[email protected]>
Reviewed-by: Roi Dayan <[email protected]>
---
  lib/dpif.h                    |   6 +-
  lib/flow.h                    |   2 +-
  lib/netdev-offload-provider.h |  30 ++++++
  lib/netdev-offload-tc.c       | 172 ++++++++++++++++++++++++++++++++++
  lib/netdev-offload.c          |   3 +-
  lib/packets.h                 |   2 +-
  6 files changed, 210 insertions(+), 5 deletions(-)

diff --git a/lib/dpif.h b/lib/dpif.h
index 129cbf6a1..f91295862 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -834,8 +834,10 @@ struct dpif_upcall {
/* DPIF_UC_ACTION only. */
      struct nlattr *userdata;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
-    struct nlattr *out_tun_key;    /* Output tunnel key. */
-    struct nlattr *actions;    /* Argument to OVS_ACTION_ATTR_USERSPACE. */
+    struct nlattr *out_tun_key; /* Output tunnel key. */
+    struct nlattr *actions;     /* Argument to OVS_ACTION_ATTR_USERSPACE. */
+    struct flow flow;           /* Caller provided 'flow' if the 'key' is not
+                                   available. */
  };
/* A callback to notify higher layer of dpif about to be purged, so that
diff --git a/lib/flow.h b/lib/flow.h
index a9d026e1c..0974bfd42 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -970,7 +970,7 @@ pkt_metadata_from_flow(struct pkt_metadata *md, const 
struct flow *flow)
md->recirc_id = flow->recirc_id;
      md->dp_hash = flow->dp_hash;
-    flow_tnl_copy__(&md->tunnel, &flow->tunnel);
+    flow_tnl_copy(&md->tunnel, &flow->tunnel);
      md->skb_priority = flow->skb_priority;
      md->pkt_mark = flow->pkt_mark;
      md->in_port = flow->in_port;
diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
index 9108856d1..a457556e5 100644
--- a/lib/netdev-offload-provider.h
+++ b/lib/netdev-offload-provider.h
@@ -28,6 +28,8 @@
  extern "C" {
  #endif
+struct dpif_upcall;
+
  struct netdev_flow_api {
      char *type;
      /* Flush all offloaded flows from a netdev.
@@ -121,6 +123,34 @@ struct netdev_flow_api {
      int (*meter_del)(ofproto_meter_id meter_id,
                       struct ofputil_meter_stats *stats);
+ /* Polls for upcall offload packets for an upcall handler. If successful,
+     * stores the upcall into '*upcall', using 'buf' for storage.
+     *
+     * The implementation should point 'upcall->flow' and 'upcall->userdata'
+     * (if any) into data in the caller-provided 'buf'.  The implementation may

This is not really correct.  The 'flow' is not a pointer in this patch set,
AFAICT, so we can't point it to buf.

I changed to '&upcall->flow'.


+     * also use 'buf' for storing the data of 'upcall->packet'.  If necessary
+     * to make room, the implementation may reallocate the data in 'buf'.
+     *
+     * The caller owns the data of 'upcall->packet' and may modify it.  If
+     * packet's headroom is exhausted as it is manipulated, 'upcall->packet'
+     * will be reallocated.  This requires the data of 'upcall->packet' to be
+     * released with ofpbuf_uninit() before 'upcall' is destroyed.  However,
+     * when an error is returned, the 'upcall->packet' may be uninitialized
+     * and should not be released.
+     *
+     * This function must not block. If no upcall is pending when it is
+     * called, it should return EAGAIN without blocking.
+     *
+     * Return 0 if successful, otherwise returns a positive errno value.
+     */
+    int (*recv)(struct dpif_upcall *upcall, struct ofpbuf *buf,
+                uint32_t handler_id);
+
+    /* Arranges for the poll loop for an upcall handler to wake up when
+     * sample socket has a message queued to be received with the recv

s/sample socket/offload provider/

Done.


+     * member functions. */
+    void (*recv_wait)(uint32_t handler_id);
+
      /* Initializies the netdev flow api.
       * Return 0 if successful, otherwise returns a positive errno value. */
      int (*init_flow_api)(struct netdev *);
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 56726acf8..8d571aca8 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -18,6 +18,8 @@
#include <errno.h>
  #include <linux/if_ether.h>
+#include <linux/psample.h>
+#include <poll.h>
#include "cmap.h"
  #include "dpif-provider.h"
@@ -125,6 +127,9 @@ struct sgid_node {
      struct offload_sample sample;
  };
+static struct nl_sock *psample_sock;
+static int psample_family;
+
  /* The sgid_map mutex protects the sample_group_ids and the sgid_map for
   * cmap_insert(), cmap_remove(), or cmap_replace() operations. */
  static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER;
@@ -156,6 +161,14 @@ sgid_find(uint32_t id)
      return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL;
  }
+static struct offload_sample *
+sample_find(uint32_t id)
+{
+    struct sgid_node *node = sgid_find(id);
+
+    return node ? &node->sample: NULL;
+}
+
  static void
  offload_sample_clone(struct offload_sample *dst,
                       const struct offload_sample *src,
@@ -2955,6 +2968,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids,
      hmap_destroy(&map);
  }
+static void
+psample_init(void)
+{
+    unsigned int psample_mcgroup;
+    int err;
+
+    if (!netdev_is_flow_api_enabled()) {
+        VLOG_DBG("Flow API is not enabled");
+        return;
+    }
+
+    if (psample_sock) {
+        VLOG_DBG("Psample socket is already initialized");
+        return;
+    }
+
+    err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
+                                &psample_family);
+    if (err) {
+        VLOG_INFO("Generic Netlink family '%s' does not exist: %s\n"
+                  "Please make sure the kernel module psample is loaded",
+                  PSAMPLE_GENL_NAME, ovs_strerror(err));
+        return;
+    }
+
+    err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
+                                 PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+                                 &psample_mcgroup);
+    if (err) {
+        VLOG_INFO("Failed to join Netlink multicast group '%s': %s",
+                  PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
+        return;
+    }
+
+    err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
+    if (err) {
+        VLOG_INFO("Failed to create psample socket: %s", ovs_strerror(err));
+        return;
+    }
+
+    err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
+    if (err) {
+        VLOG_INFO("Failed to join psample mcgroup: %s", ovs_strerror(err));
+        nl_sock_destroy(psample_sock);
+        psample_sock = NULL;
+        return;
+    }
+}
+
  static int
  netdev_tc_init_flow_api(struct netdev *netdev)
  {
@@ -3014,6 +3076,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
          ovs_mutex_lock(&sgid_lock);
          sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
          ovs_mutex_unlock(&sgid_lock);
+        psample_init();
ovsthread_once_done(&once);
      }
@@ -3231,6 +3294,113 @@ meter_tc_del_policer(ofproto_meter_id meter_id,
      return err;
  }
+struct offload_psample {
+    struct nlattr *packet; /* Packet data. */
+    uint32_t group_id;     /* Mapping id for sample offload. */
+};
+
+static int
+nl_parse_psample(struct offload_psample *psample, struct ofpbuf *buf)
+{
+    static const struct nl_policy ovs_psample_policy[] = {
+        [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
+        [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
+    };
+    struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
+    struct genlmsghdr *genl;
+    struct nlmsghdr *nlmsg;
+    struct ofpbuf b;
+
+    b = ofpbuf_const_initializer(buf->data, buf->size);
+    nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    genl = ofpbuf_try_pull(&b, sizeof *genl);
+    if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
+        || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
+                            ARRAY_SIZE(ovs_psample_policy))) {
+        return EINVAL;
+    }
+
+    psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
+    psample->packet = a[PSAMPLE_ATTR_DATA];
+
+    return 0;
+}
+
+static int
+psample_parse_packet(struct offload_psample *psample,
+                     struct dpif_upcall *upcall)
+{
+    struct flow *flow = &upcall->flow;
+    struct offload_sample *sample;
+
+    memset(upcall, 0, sizeof *upcall);
+    dp_packet_use_const(&upcall->packet,
+                        nl_attr_get(psample->packet),
+                        nl_attr_get_size(psample->packet));
+
+    sample = sample_find(psample->group_id);
+    if (!sample) {
+        VLOG_ERR_RL(&error_rl, "Failed to get sample info via group id: %d",
+                    psample->group_id);
+        return ENOENT;
+    }
+
+    upcall->userdata = sample->userdata;
+    if (sample->tunnel) {
+        flow_tnl_copy(&flow->tunnel, sample->tunnel);
+    }
+    if (sample->userspace_actions) {
+        upcall->actions = sample->userspace_actions;
+    }
+    flow->in_port.odp_port = netdev_ifindex_to_odp_port(sample->ifindex);
+    upcall->type = DPIF_UC_ACTION;
+
+    return 0;
+}
+
+static int
+netdev_tc_recv(struct dpif_upcall *upcall, struct ofpbuf *buf,
+               uint32_t handler_id)
+{
+    int read_tries = 0;
+
+    if (handler_id || !psample_sock) {
+        return EAGAIN;
+    }
+
+    for (;;) {
+        struct offload_psample psample;
+        int error;
+
+        if (++read_tries > 50) {
+            return EAGAIN;
+        }
+
+        error = nl_sock_recv(psample_sock, buf, NULL, false);
+        if (error == ENOBUFS) {
+            continue;
+        }
+        if (error) {
+            return error;
+        }
+        error = nl_parse_psample(&psample, buf);
+
+        return error ? error : psample_parse_packet(&psample, upcall);
+    }
+
+    return EAGAIN;
+}
+
+static void
+netdev_tc_recv_wait(uint32_t handler_id)
+{
+    /* For simplicity, i.e., using a single NetLink socket, only the first
+     * handler thread will be used. */
+    if (!handler_id && psample_sock) {
+        nl_sock_wait(psample_sock, POLLIN);

This should be:

     poll_fd_wait(nl_sock_fd(psample_sock), POLLIN);

The difference is the source locator.  If we call poll_fd_wait() from here
we will get correct pointer to netdev_tc_recv_wait(), if from the 
nl_sock_wait(),
then we'll get wake up notifications with a generic nl_sock_wait() location.


Done.

+    }
+}
+
  const struct netdev_flow_api netdev_offload_tc = {
     .type = "linux_tc",
     .flow_flush = netdev_tc_flow_flush,
@@ -3244,5 +3414,7 @@ const struct netdev_flow_api netdev_offload_tc = {
     .meter_set = meter_tc_set_policer,
     .meter_get = meter_tc_get_policer,
     .meter_del = meter_tc_del_policer,
+   .recv = netdev_tc_recv,
+   .recv_wait = netdev_tc_recv_wait,
     .init_flow_api = netdev_tc_init_flow_api,
  };
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index a5fa62487..403315deb 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -38,6 +38,7 @@
  #include "netdev-provider.h"
  #include "netdev-vport.h"
  #include "odp-netlink.h"
+#include "odp-util.h"
  #include "openflow/openflow.h"
  #include "packets.h"
  #include "openvswitch/ofp-print.h"
@@ -826,7 +827,7 @@ odp_port_t
  netdev_ifindex_to_odp_port(int ifindex)
  {
      struct port_to_netdev_data *data;
-    odp_port_t ret = 0;
+    odp_port_t ret = ODPP_NONE;
ovs_rwlock_rdlock(&ifindex_to_port_rwlock);
      HMAP_FOR_EACH_WITH_HASH (data, ifindex_node, ifindex, &ifindex_to_port) {
diff --git a/lib/packets.h b/lib/packets.h
index ac4c28e47..f49c3822f 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -86,7 +86,7 @@ flow_tnl_size(const struct flow_tnl *src)
   * data in 'dst' is NOT cleared, so this must not be used in cases where the
   * uninitialized portion may be hashed over. */
  static inline void
-flow_tnl_copy__(struct flow_tnl *dst, const struct flow_tnl *src)
+flow_tnl_copy(struct flow_tnl *dst, const struct flow_tnl *src)
  {
      memcpy(dst, src, flow_tnl_size(src));
  }

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to