On 29 Mar 2023, at 13:42, Chris Mi wrote: > Initialize psample socket. Add sample recv API to receive sampled > packets from psample socket. Add sample recv wait API to add psample > socket fd to poll list.
Thanks for the update, see comments inline. //Eelco > Signed-off-by: Chris Mi <[email protected]> > Reviewed-by: Roi Dayan <[email protected]> > --- > lib/dpif.h | 6 +- > lib/netdev-offload-provider.h | 17 +++ > lib/netdev-offload-tc.c | 192 ++++++++++++++++++++++++++++++++++ > lib/netdev-offload.c | 3 +- > 4 files changed, 215 insertions(+), 3 deletions(-) > > diff --git a/lib/dpif.h b/lib/dpif.h > index 129cbf6a1..5b094ce68 100644 > --- a/lib/dpif.h > +++ b/lib/dpif.h > @@ -834,8 +834,10 @@ struct dpif_upcall { > > /* DPIF_UC_ACTION only. */ > struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ > - struct nlattr *out_tun_key; /* Output tunnel key. */ > - struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ > + struct nlattr *out_tun_key; /* Output tunnel key. */ > + struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ > + struct flow flow; /* Caller provide 'flow' if 'key' is not > + available. */ "Caller provided 'flow' if the 'key' is not available." Also, multiline comments should start with a *. > }; > > /* A callback to notify higher layer of dpif about to be purged, so that > diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h > index 9108856d1..b3bf433d0 100644 > --- a/lib/netdev-offload-provider.h > +++ b/lib/netdev-offload-provider.h > @@ -28,6 +28,8 @@ > extern "C" { > #endif > > +struct dpif_upcall; > + > struct netdev_flow_api { > char *type; > /* Flush all offloaded flows from a netdev. > @@ -121,6 +123,21 @@ struct netdev_flow_api { > int (*meter_del)(ofproto_meter_id meter_id, > struct ofputil_meter_stats *stats); > > + /* Polls for sample offload packets for an upcall handler. If successful, This should be a general upcall handling API, so I would change this to: /* Polls for upcall offload packets... > + * stores the upcall into '*upcall', using 'buf' for storage. > + * Also missing this part as suggested earlier, as it should also apply to the code supplied here (does it?): * The implementation should point 'upcall->key' and 'upcall->userdata' * (if any) into data in the caller-provided 'buf'. The implementation may * also use 'buf' for storing the data of 'upcall->packet'. If necessary * to make room, the implementation may reallocate the data in 'buf'. * * The caller owns the data of 'upcall->packet' and may modify it. If * packet's headroom is exhausted as it is manipulated, 'upcall->packet' * will be reallocated. This requires the data of 'upcall->packet' to be * released with ofpbuf_uninit() before 'upcall' is destroyed. However, * when an error is returned, the 'upcall->packet' may be uninitialized * and should not be released. > + * Return 0 if successful, otherwise returns a positive errno value. > + * > + * This function must not block. If no upcall is pending when it is > + * called, it should return EAGAIN without blocking. */ > + int (*recv)(struct dpif_upcall *upcall, struct ofpbuf *buf, > + uint32_t handler_id); > + > + /* Arranges for the poll loop for an upcall handler to wake up when > + * sample socket has a message queued to be received with the recv > + * member functions. */ > + void (*recv_wait)(uint32_t handler_id); > + > /* Initializies the netdev flow api. > * Return 0 if successful, otherwise returns a positive errno value. */ > int (*init_flow_api)(struct netdev *); > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c > index c53ea5588..8c8f29af7 100644 > --- a/lib/netdev-offload-tc.c > +++ b/lib/netdev-offload-tc.c > @@ -18,6 +18,8 @@ > > #include <errno.h> > #include <linux/if_ether.h> > +#include <linux/psample.h> > +#include <poll.h> > > #include "dpif.h" > #include "hash.h" > @@ -124,6 +126,9 @@ struct sgid_node { > struct offload_sample sample; > }; > > +static struct nl_sock *psample_sock; > +static int psample_family; > + > static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER; > static struct cmap sgid_map = CMAP_INITIALIZER; > static struct id_pool *sample_group_ids OVS_GUARDED_BY(sgid_lock); > @@ -148,6 +153,14 @@ sgid_find(uint32_t id) > return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL; > } > > +static struct offload_sample * > +sample_find(uint32_t id) > +{ > + struct sgid_node *node = sgid_find(id); > + > + return node ? &node->sample: NULL; > +} > + > static void > offload_sample_clone(struct offload_sample *new, > const struct offload_sample *old) > @@ -2931,6 +2944,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids, > hmap_destroy(&map); > } > > +static void > +psample_init(void) > +{ > + unsigned int psample_mcgroup; > + int err; > + > + if (!netdev_is_flow_api_enabled()) { > + VLOG_DBG("Flow API is not enabled"); > + return; > + } > + > + if (psample_sock) { > + VLOG_DBG("Psample socket is already initialized"); > + return; > + } > + > + err = nl_lookup_genl_family(PSAMPLE_GENL_NAME, > + &psample_family); > + if (err) { > + VLOG_INFO("Generic Netlink family '%s' does not exist: %s\n" > + "Please make sure the kernel module psample is loaded", > + PSAMPLE_GENL_NAME, ovs_strerror(err)); > + return; > + } > + > + err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME, > + PSAMPLE_NL_MCGRP_SAMPLE_NAME, > + &psample_mcgroup); > + if (err) { > + VLOG_INFO("Failed to join Netlink multicast group '%s': %s", > + PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err)); > + return; > + } > + > + err = nl_sock_create(NETLINK_GENERIC, &psample_sock); > + if (err) { > + VLOG_INFO("Failed to create psample socket: %s", ovs_strerror(err)); > + return; > + } > + > + err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup); > + if (err) { > + VLOG_INFO("Failed to join psample mcgroup: %s", ovs_strerror(err)); > + nl_sock_destroy(psample_sock); > + psample_sock = NULL; > + return; > + } > +} > + > static int > netdev_tc_init_flow_api(struct netdev *netdev) > { > @@ -2990,6 +3052,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) > ovs_mutex_lock(&sgid_lock); > sample_group_ids = id_pool_create(1, UINT32_MAX - 1); > ovs_mutex_unlock(&sgid_lock); > + psample_init(); > > ovsthread_once_done(&once); > } > @@ -3206,6 +3269,133 @@ meter_tc_del_policer(ofproto_meter_id meter_id, > return err; > } > > +struct offload_psample { > + struct nlattr *packet; /* Packet data. */ > + uint32_t group_id; /* Mapping id for sample offload. */ > + uint16_t iifindex; /* Input ifindex. */ > +}; > + > +static int > +nl_parse_psample(struct offload_psample *psample, struct ofpbuf *buf) > +{ > + static const struct nl_policy ovs_psample_policy[] = { > + [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16, .optional = true, }, > + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 }, > + [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 }, > + [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC }, > + }; > + struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)]; > + struct genlmsghdr *genl; > + struct nlmsghdr *nlmsg; > + struct ofpbuf b; > + > + b = ofpbuf_const_initializer(buf->data, buf->size); > + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); > + genl = ofpbuf_try_pull(&b, sizeof *genl); > + if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family > + || !nl_policy_parse(&b, 0, ovs_psample_policy, a, > + ARRAY_SIZE(ovs_psample_policy))) { > + return EINVAL; > + } > + > + if (a[PSAMPLE_ATTR_IIFINDEX]) { > + psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]); > + } Do } else { psample->iffindex = 0 }, to safe on the memset later. > + psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]); > + psample->packet = a[PSAMPLE_ATTR_DATA]; > + > + return 0; > +} > + > +static int > +psample_parse_packet(struct offload_psample *psample, > + struct dpif_upcall *upcall) > +{ > + struct flow *flow = &upcall->flow; > + struct offload_sample *sample; > + > + memset(upcall, 0, sizeof *upcall); > + dp_packet_use_const(&upcall->packet, > + nl_attr_get(psample->packet), > + nl_attr_get_size(psample->packet)); > + > + sample = sample_find(psample->group_id); > + if (!sample) { > + VLOG_ERR_RL(&error_rl, "failed to get sample info via group id: %d", Capital F for failed. > + psample->group_id); > + return ENOENT; > + } > + > + upcall->userdata = sample->userdata; > + if (sample->tunnel) { > + memcpy(&flow->tunnel, sample->tunnel, sizeof flow->tunnel); Are we ok memcpy will work here? We might need to initialize the sample->tunnel info correctly in the upcomming patches, see flow_tnl_copy__(). > + } > + if (sample->userspace_actions) { > + upcall->actions = sample->userspace_actions; > + } > + if (psample->iifindex) { > + flow->in_port.odp_port = > netdev_ifindex_to_odp_port(psample->iifindex); > + } If not provided I think the port should be set to ODPP_NONE (the default value), please confirm. If this is not true the above netdev_ifindex_to_odp_port() should cause this function to fail if ODPP_NONE is returned. > + upcall->type = DPIF_UC_ACTION; > + > + return 0; > +} > + > +static int > +netdev_tc_recv(struct dpif_upcall *upcall, struct ofpbuf *buf, > + uint32_t handler_id) > +{ > + int read_tries = 0; > + > + if (handler_id) { > + return EAGAIN; > + } > + > + if (!psample_sock) { > + return EAGAIN; > + } > + Combine the two if's in a single one 'if (handler_id || !psample_sock)' > + for (;;) { > + struct offload_psample psample; > + int error; > + > + if (++read_tries > 50) { > + return EAGAIN; > + } > + > + error = nl_sock_recv(psample_sock, buf, NULL, false); > + if (error == ENOBUFS) { > + continue; > + } > + > + if (error) { > + if (error == EAGAIN) { > + break; The outer loop returns EAGAIN, so why explicitly check/break, we can just return error, can't we? > + } > + return error; > + } > + > + memset(&psample, 0, sizeof psample); Remove memset here, see comment in nl_parse_psample. > + error = nl_parse_psample(&psample, buf); > + > + return error ? error : psample_parse_packet(&psample, upcall); > + } > + > + return EAGAIN; > +} > + > +static void > +netdev_tc_recv_wait(uint32_t handler_id) > +{ > + if (handler_id) { > + return; > + } > + > + if (psample_sock) { > + nl_sock_wait(psample_sock, POLLIN); > + } > +} Add some comments that netlink will only do this in handler 0 for simplicity. Maybe something like: +static void +netdev_tc_recv_wait(uint32_t handler_id) +{ + /* For simplicity, i.e., using a single NetLink socket, only the first + * handler thread will be used. */ + if (!handler_id && psample_sock) } + nl_sock_wait(psample_sock, POLLIN); + } +} > + > const struct netdev_flow_api netdev_offload_tc = { > .type = "linux_tc", > .flow_flush = netdev_tc_flow_flush, > @@ -3219,5 +3409,7 @@ const struct netdev_flow_api netdev_offload_tc = { > .meter_set = meter_tc_set_policer, > .meter_get = meter_tc_get_policer, > .meter_del = meter_tc_del_policer, > + .recv = netdev_tc_recv, > + .recv_wait = netdev_tc_recv_wait, > .init_flow_api = netdev_tc_init_flow_api, > }; > diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c > index 4592262bd..84503ec8b 100644 > --- a/lib/netdev-offload.c > +++ b/lib/netdev-offload.c > @@ -38,6 +38,7 @@ > #include "netdev-provider.h" > #include "netdev-vport.h" > #include "odp-netlink.h" > +#include "odp-util.h" > #include "openflow/openflow.h" > #include "packets.h" > #include "openvswitch/ofp-print.h" > @@ -820,7 +821,7 @@ odp_port_t > netdev_ifindex_to_odp_port(int ifindex) > { > struct port_to_netdev_data *data; > - odp_port_t ret = 0; > + odp_port_t ret = ODPP_NONE; > > ovs_rwlock_rdlock(&netdev_hmap_rwlock); > HMAP_FOR_EACH_WITH_HASH (data, ifindex_node, ifindex, &ifindex_to_port) { > -- > 2.26.3 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
