Initialize psample socket. Add sFlow recv API to receive sampled packets from psample socket. Add sFow recv wait API to add psample socket fd to poll list.
Signed-off-by: Chris Mi <[email protected]> Reviewed-by: Roi Dayan <[email protected]> --- lib/dpif.h | 7 ++ lib/netdev-offload-provider.h | 11 ++ lib/netdev-offload-tc.c | 188 ++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+) diff --git a/lib/dpif.h b/lib/dpif.h index 6cb4dae6d..95807af8f 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -836,6 +836,13 @@ struct dpif_upcall { struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ struct nlattr *out_tun_key; /* Output tunnel key. */ struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ + /* SFlow offload only. + * When receiving sampled packets from psample socket, there is no + * flow key. But input tunnel and input ifindex are available. They + * are enough to construct flow and continue to process sFlow. + */ + struct flow_tnl *in_tun; /* Input tunnel key. */ + uint32_t iifindex; /* Input ifindex. */ }; /* A callback to notify higher layer of dpif about to be purged, so that diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h index 9108856d1..9e2722fd1 100644 --- a/lib/netdev-offload-provider.h +++ b/lib/netdev-offload-provider.h @@ -121,6 +121,17 @@ struct netdev_flow_api { int (*meter_del)(ofproto_meter_id meter_id, struct ofputil_meter_stats *stats); + /* Receives sampled packets in 'buf' from psample socket and fill the + * necessary members in 'upcall'. + * Return 0 if successful, otherwise returns a positive errno value. + */ + int (*sflow_recv)(struct dpif_upcall *upcall, struct ofpbuf *buf); + + /* Add psample socket fd to poll list. Wake the upcall thread up to + * process it if there is any sampled packets, + */ + void (*sflow_recv_wait)(void); + /* Initializies the netdev flow api. * Return 0 if successful, otherwise returns a positive errno value. */ int (*init_flow_api)(struct netdev *); diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 0dbb7954f..d7901fa68 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -18,6 +18,8 @@ #include <errno.h> #include <linux/if_ether.h> +#include <linux/psample.h> +#include <sys/poll.h> #include "dpif.h" #include "hash.h" @@ -104,6 +106,9 @@ static void parse_tc_flower_to_stats(struct tc_flower *flower, static int get_ufid_adjust_stats(const ovs_u128 *ufid, struct dpif_flow_stats *stats); +static struct nl_sock *psample_sock; +static int psample_family; + /* When offloading sample action to TC, userspace creates a unique ID * to map sFlow action and tunnel info and passes this ID to kernel * instead of the sFlow info. Psample will send this ID and sampled @@ -151,6 +156,19 @@ sgid_find(uint32_t id) return node ? CONTAINER_OF(node, const struct sgid_node, id_node) : NULL; } +static struct offload_sflow * +sflow_find(uint32_t id) +{ + const struct sgid_node *node; + + node = sgid_find(id); + if (!node) { + return NULL; + } + + return CONST_CAST(struct offload_sflow *, &node->sflow); +} + static uint32_t dpif_sflow_hash(const struct offload_sflow *sflow) { @@ -3015,6 +3033,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids, hmap_destroy(&map); } +static void +psample_init(void) +{ + unsigned int psample_mcgroup; + int err; + + if (!netdev_is_flow_api_enabled()) { + VLOG_DBG("Flow API is not enabled."); + return; + } + + if (psample_sock) { + VLOG_DBG("Psample socket is already initialized."); + return; + } + + err = nl_lookup_genl_family(PSAMPLE_GENL_NAME, + &psample_family); + if (err) { + VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n" + "Please make sure the kernel module psample is loaded.", + PSAMPLE_GENL_NAME, ovs_strerror(err)); + return; + } + + err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME, + PSAMPLE_NL_MCGRP_SAMPLE_NAME, + &psample_mcgroup); + if (err) { + VLOG_WARN("Failed to join Netlink multicast group '%s': %s", + PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err)); + return; + } + + err = nl_sock_create(NETLINK_GENERIC, &psample_sock); + if (err) { + VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err)); + return; + } + + err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup); + if (err) { + VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err)); + nl_sock_destroy(psample_sock); + psample_sock = NULL; + return; + } +} + static int netdev_tc_init_flow_api(struct netdev *netdev) { @@ -3069,6 +3136,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) meter_police_ids = id_pool_create(METER_POLICE_IDS_BASE, METER_POLICE_IDS_MAX - METER_POLICE_IDS_BASE + 1); sample_group_ids = id_pool_create(1, UINT32_MAX - 1); + psample_init(); tc_cleanup_policer_actions(meter_police_ids, METER_POLICE_IDS_BASE, METER_POLICE_IDS_MAX); ovs_mutex_unlock(&meter_police_ids_mutex); @@ -3288,6 +3356,124 @@ meter_tc_del_policer(ofproto_meter_id meter_id, return err; } +struct offload_psample { + struct nlattr *packet; /* Packet data. */ + int group_id; /* Mapping id for sFlow offload. */ + int iifindex; /* Input ifindex. */ +}; + +static int +psample_from_ofpbuf(struct offload_psample *psample, + struct ofpbuf *buf) +{ + static const struct nl_policy ovs_psample_policy[] = { + [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 }, + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 }, + [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 }, + [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC }, + }; + struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)]; + struct genlmsghdr *genl; + struct nlmsghdr *nlmsg; + struct ofpbuf b; + + b = ofpbuf_const_initializer(buf->data, buf->size); + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + genl = ofpbuf_try_pull(&b, sizeof *genl); + if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family + || !nl_policy_parse(&b, 0, ovs_psample_policy, a, + ARRAY_SIZE(ovs_psample_policy))) { + return EINVAL; + } + + psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]); + psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]); + psample->packet = a[PSAMPLE_ATTR_DATA]; + + return 0; +} + +static int +psample_parse_packet(struct offload_psample *psample, + struct offload_sflow *sflow, + struct dpif_upcall *upcall) +{ + dp_packet_use_stub(&upcall->packet, + CONST_CAST(struct nlattr *, + nl_attr_get(psample->packet)) - 1, + nl_attr_get_size(psample->packet) + + sizeof(struct nlattr)); + dp_packet_set_data(&upcall->packet, + (char *) dp_packet_data(&upcall->packet) + + sizeof(struct nlattr)); + dp_packet_set_size(&upcall->packet, nl_attr_get_size(psample->packet)); + + sflow = sflow_find(psample->group_id); + if (!sflow) { + return ENOENT; + } + + upcall->key = NULL; + upcall->key_len = 0; + upcall->ufid = sflow->ufid; + upcall->userdata = sflow->userdata; + upcall->actions = CONST_CAST(struct nlattr *, sflow->actions); + upcall->in_tun = sflow->tunnel; + upcall->iifindex = psample->iifindex; + upcall->type = DPIF_UC_ACTION; + + return 0; +} + +static int +sflow_psample_recv(struct dpif_upcall *upcall, struct ofpbuf *buf) +{ + int read_tries = 0; + + if (!psample_sock) { + return ENOENT; + } + + for (;;) { + struct offload_psample psample; + struct offload_sflow sflow; + int error; + + if (++read_tries > 50) { + return EAGAIN; + } + + error = nl_sock_recv(psample_sock, buf, NULL, false); + if (error == ENOBUFS) { + continue; + } + + if (error) { + if (error == EAGAIN) { + break; + } + return error; + } + + error = psample_from_ofpbuf(&psample, buf); + if (!error) { + return psample_parse_packet(&psample, &sflow, upcall); + } else if (error) { + return error; + } + } + + return EAGAIN; +} + +static void +sflow_psample_recv_wait(void) +{ + if (psample_sock) { + nl_sock_wait(psample_sock, POLLIN); + } +} + const struct netdev_flow_api netdev_offload_tc = { .type = "linux_tc", .flow_flush = netdev_tc_flow_flush, @@ -3301,5 +3487,7 @@ const struct netdev_flow_api netdev_offload_tc = { .meter_set = meter_tc_set_policer, .meter_get = meter_tc_get_policer, .meter_del = meter_tc_del_policer, + .sflow_recv = sflow_psample_recv, + .sflow_recv_wait = sflow_psample_recv_wait, .init_flow_api = netdev_tc_init_flow_api, }; -- 2.26.3 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
