From: Roni Bar Yanai <[email protected]> vport offloaded functions should have a different implementation for kernel based OVS versus dpdk based OVS. Currently there is an unconditional execution of a kernel based calls even if the vport was added by dpif-netdev rather than by dpif-netlink. Before this commit and in case hw-offload=true adding a netdev datapath vport resulted in an error since the vport kernel based APIs were called. In practice the API returned immediately on a get_ifindex() failure (see [1]), which caused no harm, but it is required to avoid such scenarios in advance. In case of a netdev datapath vport flow functions must be updated at runtime. This commit reassigns flow functions to NULL when such a vport is added. It uses a duplicated vport class to keep the original default kernel vport class intact. This enables using vports in a mixed environment of kernel and netdev (userspace) bridges at the same time.
[1] ovs|00002|netdev_tc_offloads(dp_netdev_flow_5)|ERR|flow_put: failed to get ifindex for vxlan_sys_4789: No such device Reviewed-by: Asaf Penso <[email protected]> Co-authored-by: Ophir Munk <[email protected]> Signed-off-by: Ophir Munk <[email protected]> Signed-off-by: Roni Bar Yanai <[email protected]> --- v1: First version v2: Call netdev_rte_offloads_port_add() (implemented in file netdev-rte-offloads.c) under #ifdef DPDK_NETDEV lib/dpif-netdev.c | 4 + lib/netdev-rte-offloads.c | 47 +++++++++++ lib/netdev-rte-offloads.h | 5 ++ lib/netdev-vport.c | 211 ++++++++++++++++++++++++++++------------------ lib/netdev-vport.h | 4 + 5 files changed, 190 insertions(+), 81 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c20f875..ba88d97 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -51,6 +51,7 @@ #include "latch.h" #include "netdev.h" #include "netdev-provider.h" +#include "netdev-rte-offloads.h" #include "netdev-vport.h" #include "netlink.h" #include "odp-execute.h" @@ -1865,6 +1866,9 @@ dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev, if (!error) { *port_nop = port_no; error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no); +#ifdef DPDK_NETDEV + netdev_rte_offloads_port_add(netdev); +#endif } ovs_mutex_unlock(&dp->port_mutex); diff --git a/lib/netdev-rte-offloads.c b/lib/netdev-rte-offloads.c index e9ab086..85f01e5 100644 --- a/lib/netdev-rte-offloads.c +++ b/lib/netdev-rte-offloads.c @@ -22,6 +22,7 @@ #include "cmap.h" #include "dpif-netdev.h" #include "netdev-provider.h" +#include "netdev-vport.h" #include "openvswitch/match.h" #include "openvswitch/vlog.h" #include "packets.h" @@ -731,3 +732,49 @@ netdev_rte_offloads_flow_del(struct netdev *netdev, const ovs_u128 *ufid, return netdev_rte_offloads_destroy_flow(netdev, ufid, rte_flow); } + +/* + * Vport netdev flow pointers are initialized by default to kernel calls. + * They should be nullified or be set to a valid netdev (userspace) calls. + */ +#define NULLIFY(f) (ndc->f = NULL) +static void +netdev_rte_offloads_vxlan_init(struct netdev *netdev) +{ + /* + * Clone default function pointers some + * of which may be kernel flow pointers. + */ + struct netdev_class *ndc = netdev_vport_dup_class_once(netdev); + if (!ndc) { + VLOG_DBG("Vport netdev_class offload api cannot be updated."); + return; + } + + /* Override kernel flow pointers. */ + NULLIFY(flow_put); + NULLIFY(flow_flush); + NULLIFY(flow_dump_create); + NULLIFY(flow_dump_destroy); + NULLIFY(flow_dump_next); + NULLIFY(flow_put); + NULLIFY(flow_get); + NULLIFY(flow_del); + NULLIFY(init_flow_api); + + netdev_vport_update_class(netdev, ndc); +} + +/* + * This function is called as part of adding a new dpif netdev port. + * In case of vport class of "vxlan" type we update it to match netdev + * datapath apis. + */ +void +netdev_rte_offloads_port_add(struct netdev *netdev) +{ + const char *type = netdev_get_type(netdev); + if (!strcmp("vxlan", type)) { + netdev_rte_offloads_vxlan_init(netdev); + } +} diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h index 18c8a75..7fbe414 100644 --- a/lib/netdev-rte-offloads.h +++ b/lib/netdev-rte-offloads.h @@ -50,6 +50,11 @@ int netdev_rte_offloads_flow_put(struct netdev *netdev, struct match *match, int netdev_rte_offloads_flow_del(struct netdev *netdev, const ovs_u128 *ufid, struct dpif_flow_stats *stats); +/* + * Called by dpif netdev when a port is added + */ +void netdev_rte_offloads_port_add(struct netdev *netdev); + #define DPDK_FLOW_OFFLOAD_API \ .flow_put = netdev_rte_offloads_flow_put, \ .flow_del = netdev_rte_offloads_flow_del diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index ab59166..7634133 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -1143,90 +1143,95 @@ netdev_vport_get_ifindex(const struct netdev *netdev_) .get_tunnel_config = get_netdev_tunnel_config, \ .get_status = tunnel_get_status +/* The name of the dpif_port should be short enough to accomodate adding + * a port number to the end if one is necessary. */ +static struct vport_class vport_classes[] = { + { "genev_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "geneve", + .build_header = netdev_geneve_build_header, + .push_header = netdev_tnl_push_udp_header, + .pop_header = netdev_geneve_pop_header, + .get_ifindex = NETDEV_VPORT_GET_IFINDEX, + }, + {{NULL, NULL, 0, 0}} + }, + { "gre_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "gre", + .build_header = netdev_gre_build_header, + .push_header = netdev_gre_push_header, + .pop_header = netdev_gre_pop_header, + .get_ifindex = NETDEV_VPORT_GET_IFINDEX, + }, + {{NULL, NULL, 0, 0}} + }, + { "vxlan_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "vxlan", + .build_header = netdev_vxlan_build_header, + .push_header = netdev_tnl_push_udp_header, + .pop_header = netdev_vxlan_pop_header, + .get_ifindex = NETDEV_VPORT_GET_IFINDEX + }, + {{NULL, NULL, 0, 0}} + }, + { "lisp_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "lisp" + }, + {{NULL, NULL, 0, 0}} + }, + { "stt_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "stt" + }, + {{NULL, NULL, 0, 0}} + }, + { "erspan_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "erspan", + .build_header = netdev_erspan_build_header, + .push_header = netdev_erspan_push_header, + .pop_header = netdev_erspan_pop_header + }, + {{NULL, NULL, 0, 0}} + }, + { "ip6erspan_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "ip6erspan", + .build_header = netdev_erspan_build_header, + .push_header = netdev_erspan_push_header, + .pop_header = netdev_erspan_pop_header + }, + {{NULL, NULL, 0, 0}} + }, + { "ip6gre_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "ip6gre", + .build_header = netdev_gre_build_header, + .push_header = netdev_gre_push_header, + .pop_header = netdev_gre_pop_header + }, + {{NULL, NULL, 0, 0}} + }, +}; + +#define NUM_VPORT_CLASSES (sizeof vport_classes / sizeof vport_classes[0]) + +static struct vport_class dup_vport_classes[NUM_VPORT_CLASSES]; + void netdev_vport_tunnel_register(void) { - /* The name of the dpif_port should be short enough to accomodate adding - * a port number to the end if one is necessary. */ - static struct vport_class vport_classes[] = { - { "genev_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "geneve", - .build_header = netdev_geneve_build_header, - .push_header = netdev_tnl_push_udp_header, - .pop_header = netdev_geneve_pop_header, - .get_ifindex = NETDEV_VPORT_GET_IFINDEX, - }, - {{NULL, NULL, 0, 0}} - }, - { "gre_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "gre", - .build_header = netdev_gre_build_header, - .push_header = netdev_gre_push_header, - .pop_header = netdev_gre_pop_header, - .get_ifindex = NETDEV_VPORT_GET_IFINDEX, - }, - {{NULL, NULL, 0, 0}} - }, - { "vxlan_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "vxlan", - .build_header = netdev_vxlan_build_header, - .push_header = netdev_tnl_push_udp_header, - .pop_header = netdev_vxlan_pop_header, - .get_ifindex = NETDEV_VPORT_GET_IFINDEX - }, - {{NULL, NULL, 0, 0}} - }, - { "lisp_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "lisp" - }, - {{NULL, NULL, 0, 0}} - }, - { "stt_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "stt" - }, - {{NULL, NULL, 0, 0}} - }, - { "erspan_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "erspan", - .build_header = netdev_erspan_build_header, - .push_header = netdev_erspan_push_header, - .pop_header = netdev_erspan_pop_header - }, - {{NULL, NULL, 0, 0}} - }, - { "ip6erspan_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "ip6erspan", - .build_header = netdev_erspan_build_header, - .push_header = netdev_erspan_push_header, - .pop_header = netdev_erspan_pop_header - }, - {{NULL, NULL, 0, 0}} - }, - { "ip6gre_sys", - { - TUNNEL_FUNCTIONS_COMMON, - .type = "ip6gre", - .build_header = netdev_gre_build_header, - .push_header = netdev_gre_push_header, - .pop_header = netdev_gre_pop_header - }, - {{NULL, NULL, 0, 0}} - }, - }; static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { @@ -1237,6 +1242,8 @@ netdev_vport_tunnel_register(void) netdev_register_provider(&vport_classes[i].netdev_class); } + memset(dup_vport_classes, 0, sizeof dup_vport_classes); + unixctl_command_register("tnl/egress_port_range", "min max", 0, 2, netdev_tnl_egress_port_range, NULL); @@ -1259,3 +1266,45 @@ netdev_vport_patch_register(void) simap_init(&patch_class.global_cfg_tracker); netdev_register_provider(&patch_class.netdev_class); } + +/* + * This functions receives a netdev pointer which contains default + * netdev_class fields (such as kernel flow functions pointers). + * It returns a duplicated netdev_class that can be overriden by the caller + */ +struct netdev_class * +netdev_vport_dup_class_once(struct netdev *netdev) +{ + unsigned int i; + struct vport_class *vpclass; + struct netdev_class *ndclass = NULL; + + if (!is_vport_class(netdev->netdev_class)) { + goto out; + } + + /* Get the vport_class which contains the netdev_class. */ + vpclass = vport_class_cast(netdev_get_class(netdev)); + /* Calculate vpclass entry index in the array 'vport_classes' */ + i = vpclass - &vport_classes[0]; + if (i >= NUM_VPORT_CLASSES) { + goto out; + } + /* Entry duplication should be done only once */ + if (!dup_vport_classes[i].dpif_port) { + memcpy(&dup_vport_classes[i], vpclass, sizeof *vpclass); + } + /* Get the duplicated netdev_class pointer */ + ndclass = &dup_vport_classes[i].netdev_class; +out: + return ndclass; +} + +void +netdev_vport_update_class(struct netdev *netdev, struct netdev_class *ndclass) +{ + if (is_vport_class(netdev->netdev_class)) { + netdev->netdev_class = ndclass; + } +} + diff --git a/lib/netdev-vport.h b/lib/netdev-vport.h index 9d756a2..d100f76 100644 --- a/lib/netdev-vport.h +++ b/lib/netdev-vport.h @@ -42,6 +42,10 @@ void netdev_vport_inc_tx(const struct netdev *, bool netdev_vport_is_vport_class(const struct netdev_class *); const char *netdev_vport_class_get_dpif_port(const struct netdev_class *); +struct netdev_class *netdev_vport_dup_class_once(struct netdev *netdev); +void netdev_vport_update_class(struct netdev *netdev, + struct netdev_class *ndclass); + #ifndef _WIN32 enum { NETDEV_VPORT_NAME_BUFSIZE = 16 }; #else -- 1.8.3.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
