"Stokes, Ian" <ian.sto...@intel.com> writes: >> Dpdk port representors were introduced in dpdk versions 18.xx. >> Prior to port representors there was a one-to-one relationship between an >> rte device (e.g. PCI bus) and an eth device (referenced as dpdk port id in >> OVS). With port representors the relationship becomes one-to-many rte >> device to eth devices. >> For example in [3] there are two devices (representors) using the same PCI >> physical address 0000:08:00.0: "0000:08:00.0,representor=[3]" and >> "0000:08:00.0,representor=[5]". >> This commit handles the new one-to-many relationship. For example, when >> one of the device port representors in [3] is closed - the PCI bus cannot >> be detached until the other device port representor is closed as well. OVS >> remains backward compatible by supporting dpdk legacy PCI ports which do >> not include port representors. >> Dpdk port representors related commits are listed in [1]. Dpdk port >> representors documentation appears in [2]. A sample configuration which >> uses two representors ports (the output of "ovs-vsctl show" >> command) is shown in [3]. >> > > Hi Ophir, thanks for the v5. > > A few comments below to be addressed. > >> [1] >> e0cb96204b71 ("net/i40e: add support for representor ports") >> cf80ba6e2038 ("net/ixgbe: add support for representor ports") >> 26c08b979d26 ("net/mlx5: add port representor awareness") >> >> [2] >> doc/guides/prog_guide/switch_representation.rst >> >> [3] >> Bridge "ovs_br0" >> Port "ovs_br0" >> Interface "ovs_br0" >> type: internal >> Port "port-rep3" >> Interface "port-rep3" >> type: dpdk >> options: {dpdk-devargs="0000:08:00.0,representor=[3]"} >> Port "port-rep5" >> Interface "port-rep5" >> type: dpdk >> options: {dpdk-devargs="0000:08:00.0,representor=[5]"} >> ovs_version: "2.10.90" >> >> Signed-off-by: Ophir Munk <ophi...@mellanox.com> >> --- >> v1 (hwol branch): >> 1. rebase on top of Kevin's patch >> dpdk: Update to use DPDK 18.11.[ovs-dev,v7,dpdk-latest,1/1] dpdk: Update >> to use DPDK 18.11. >> https://patchwork.ozlabs.org/patch/1005535/ >> 2. skipping count of sibling ports in case the sibling port state is >> RTE_ETH_DEV_UNUSED >> >> v2 (switching to master branch): >> 1. Update based on review comments: >> https://patchwork.ozlabs.org/patch/1011457/#2051417 >> 2. Send patch on master branch >> >> v3: >> Add a FIXME comment regarding the direct access to DPDK rte_eth_devices >> array >> >> v4: >> 1. Add FIXME comment to every direct access to DPDK rte_eth_devices array >> 2. Do not probe unconditionally during add-port. Instead see if a device >> has already been probed, and if so skip the rte_dev_probe(devargs) >> >> v5: >> 1. Updates following v4 reviews >> 2. Handle cases where flag RTE_ETH_DEV_CLOSE_REMOVE is not supported >> (for PMDs not supporting representors) >> >> lib/netdev-dpdk.c | 174 ++++++++++++++++++++++++++++++++++++++++++------- >> ----- >> 1 file changed, 137 insertions(+), 37 deletions(-) >> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 320422b..6099564 >> 100644 >> --- a/lib/netdev-dpdk.c >> +++ b/lib/netdev-dpdk.c >> @@ -1218,6 +1218,26 @@ dpdk_dev_parse_name(const char dev_name[], const >> char prefix[], >> } >> } >> >> +/* Get the number of OVS interfaces which have the same DPDK >> + * rte device (e.g. same pci bus address). >> + * FIXME: avoid direct access to DPDK internal array rte_eth_devices. >> + */ >> +static int >> +netdev_dpdk_get_num_ports(struct rte_device *device) >> + OVS_REQUIRES(dpdk_mutex) >> +{ >> + struct netdev_dpdk *dev; >> + int count = 0; >> + >> + LIST_FOR_EACH (dev, list_node, &dpdk_list) { >> + if (rte_eth_devices[dev->port_id].device == device >> + && rte_eth_devices[dev->port_id].state != RTE_ETH_DEV_UNUSED) { >> + count++; >> + } >> + } >> + return count; >> +} >> + >> static int >> vhost_common_construct(struct netdev *netdev) >> OVS_REQUIRES(dpdk_mutex) >> @@ -1353,7 +1373,8 @@ static void >> netdev_dpdk_destruct(struct netdev *netdev) { >> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); >> - struct rte_eth_dev_info dev_info; >> + struct rte_device *rte_dev; >> + struct rte_eth_dev *eth_dev; >> >> ovs_mutex_lock(&dpdk_mutex); >> >> @@ -1361,12 +1382,32 @@ netdev_dpdk_destruct(struct netdev *netdev) >> dev->started = false; >> >> if (dev->attached) { >> + /* Retrieve eth device data before closing it. >> + * FIXME: avoid direct access to DPDK internal array >> rte_eth_devices. >> + */ >> + eth_dev = &rte_eth_devices[dev->port_id]; >> + uint32_t remove_on_close = >> + eth_dev->data->dev_flags & RTE_ETH_DEV_CLOSE_REMOVE; >> + rte_dev = eth_dev->device; >> + >> + /* Remove the eth device. */ >> rte_eth_dev_close(dev->port_id); >> - rte_eth_dev_info_get(dev->port_id, &dev_info); >> - if (dev_info.device && !rte_dev_remove(dev_info.device)) { >> - VLOG_INFO("Device '%s' has been detached", dev->devargs); >> + >> + /* Remove this rte device and all its eth devices if flag >> + * RTE_ETH_DEV_CLOSE_REMOVE is not supported (which means >> representors >> + * are not supported), or if all the eth devices belonging to the >> rte >> + * device are closed. >> + */ >> + if (!remove_on_close || !netdev_dpdk_get_num_ports(rte_dev)) { >> + if (rte_dev_remove(rte_dev) < 0) { >> + VLOG_ERR("Device '%s' can not be detached", dev- >> >devargs); >> + } else { >> + /* Device was closed and detached. */ >> + VLOG_INFO("Device '%s' has been detached", dev->devargs); >> + } >> } else { >> - VLOG_ERR("Device '%s' can not be detached", dev->devargs); >> + /* Device was only closed. rte_dev_remove() was not called. >> */ >> + VLOG_INFO("Device '%s' has been removed", dev->devargs); >> } >> } >> >> @@ -1632,8 +1673,54 @@ netdev_dpdk_get_port_by_mac(const char *mac_str) >> return DPDK_ETH_PORT_ID_INVALID; >> } >> >> +/* Return the first DPDK port_id matching the devargs pattern. */ >> +static dpdk_port_t netdev_dpdk_probe_port_by_devargs(const char >> +*devargs) >> + OVS_REQUIRES(dpdk_mutex) >> +{ >> + struct rte_dev_iterator iterator; >> + dpdk_port_t port_id; >> + >> + RTE_ETH_FOREACH_MATCHING_DEV (port_id, devargs, &iterator) { >> + /* If a break is done - must call rte_eth_iterator_cleanup. */ >> + rte_eth_iterator_cleanup(&iterator); >> + break; >> + } >> + /* The port may be invalid if it was not probed */ > > Minor, period at the end of the comment above. >> + if (port_id == DPDK_ETH_PORT_ID_INVALID) { >> + if (rte_dev_probe(devargs)) { >> + port_id = DPDK_ETH_PORT_ID_INVALID; >> + } else { >> + RTE_ETH_FOREACH_MATCHING_DEV (port_id, devargs, &iterator) { >> + /* If a break is done - must call >> rte_eth_iterator_cleanup. */ >> + rte_eth_iterator_cleanup(&iterator); >> + break; >> + } >> + } >> + } >> + >> + return port_id; >> +} >> + >> +static int >> +netdev_dpdk_check_dup_dev(dpdk_port_t port_id, struct netdev *netdev, >> + const char *devargs, char **errp) { >> + struct netdev_dpdk *dup_dev; >> + >> + dup_dev = netdev_dpdk_lookup_by_port_id(port_id); > CLANG requires dpdk_mutex to be held above when calling > netdev_dpdk_lookup_by_port_id() above.
It's a missing annotation, I think. Function requires: + OVS_REQUIRES(dpdk_mutex) In all the paths that I can see it called, it is being called with dpdk_mutex held. But there isn't any semantic enforcement, and the function itself doesn't take the mutex. > lib/netdev-dpdk.c:1710:15: error: calling function > 'netdev_dpdk_lookup_by_port_id' requires holding mutex 'dpdk_mutex' > exclusively [-Werror,-Wthread-safety-analysis] > dup_dev = netdev_dpdk_lookup_by_port_id(port_id); > > >> + if (dup_dev) { >> + VLOG_WARN_BUF(errp, "'%s' is trying to use device '%s' " >> + "which is already in use by '%s'", >> + netdev_get_name(netdev), devargs, >> + netdev_get_name(&dup_dev->up)); >> + return EADDRINUSE; >> + } >> + return 0; >> +} >> + >> /* >> - * Normally, a PCI id is enough for identifying a specific DPDK port. >> + * Normally, a PCI id (optionally followed by a representor number) >> + * is enough for identifying a specific DPDK port. >> * However, for some NICs having multiple ports sharing the same PCI >> * id, using PCI id won't work then. >> * >> @@ -1641,32 +1728,35 @@ netdev_dpdk_get_port_by_mac(const char *mac_str) >> * >> * Note that the compatibility is fully kept: user can still use the >> * PCI id for adding ports (when it's enough for them). >> + * >> + * In order to avoid clang errors add OVS_REQUIRES(dpdk_mutex) to this >> + function >> + * since it is required by netdev_dpdk_lookup_by_port_id() while the >> + dpdk_mutex >> + * is taken outside of this function. I don't think the comment is needed. The function is required to be called with dpdk_mutex held. Not just to avoid clang errors, but because it is a part of the semantic of netdev_dpdk_lookup_by_port_id(). >> */ >> static dpdk_port_t >> netdev_dpdk_process_devargs(struct netdev_dpdk *dev, >> const char *devargs, char **errp) >> + OVS_REQUIRES(dpdk_mutex) >> { >> - char *name; >> dpdk_port_t new_port_id = DPDK_ETH_PORT_ID_INVALID; >> >> if (strncmp(devargs, "class=eth,mac=", 14) == 0) { >> new_port_id = netdev_dpdk_get_port_by_mac(&devargs[14]); >> } else { >> - name = xmemdup0(devargs, strcspn(devargs, ",")); >> - if (rte_eth_dev_get_port_by_name(name, &new_port_id) >> - || !rte_eth_dev_is_valid_port(new_port_id)) { >> - /* Device not found in DPDK, attempt to attach it */ >> - if (!rte_dev_probe(devargs) >> - && !rte_eth_dev_get_port_by_name(name, &new_port_id)) { >> - /* Attach successful */ >> - dev->attached = true; >> - VLOG_INFO("Device '%s' attached to DPDK", devargs); >> + new_port_id = netdev_dpdk_probe_port_by_devargs(devargs); >> + if (!rte_eth_dev_is_valid_port(new_port_id)) { >> + new_port_id = DPDK_ETH_PORT_ID_INVALID; >> + } else { >> + if (netdev_dpdk_check_dup_dev(new_port_id, &dev->up, >> + devargs, errp)) { >> + new_port_id = DPDK_ETH_PORT_ID_INVALID; >> } else { >> - /* Attach unsuccessful */ >> - new_port_id = DPDK_ETH_PORT_ID_INVALID; >> + /* Device successfully found. */ >> + dev->attached = true; >> + VLOG_INFO("Device '%s' attached to DPDK port %d", >> + devargs, new_port_id); >> } >> } >> - free(name); >> } >> >> if (new_port_id == DPDK_ETH_PORT_ID_INVALID) { @@ -1756,16 +1846,9 @@ >> netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args, >> /* Already configured, do not reconfigure again */ >> err = 0; >> } else { >> - struct netdev_dpdk *dup_dev; >> - >> - dup_dev = netdev_dpdk_lookup_by_port_id(new_port_id); >> - if (dup_dev) { >> - VLOG_WARN_BUF(errp, "'%s' is trying to use device >> '%s' " >> - "which is already in use by >> '%s'", >> - netdev_get_name(netdev), new_devargs, >> - netdev_get_name(&dup_dev->up)); >> - err = EADDRINUSE; >> - } else { >> + err = netdev_dpdk_check_dup_dev(new_port_id, netdev, >> + new_devargs, errp); > > This introduces a second call to netdev_dpdk_check_dup_dev(), I thought that > this is already called as part of netdev_dpdk_process_devargs(), is this > second > call here required? When would this case hit? > > It seems to be specific to when the 'class=eth,mac=" devargs are > passed, then netdev_dpdk_check_dup_dev() > won't be checked in netdev_dpdk_process_devargs() and instead is > checked here after netdev_dpdk_process_devargs() returns a new_port_id > that is not invalid and is not the existing port id. > > I think a comment to clarify would be welcome here. > > Ian > >> + if (!err) { >> int sid = rte_eth_dev_socket_id(new_port_id); >> >> dev->requested_socket_id = sid < 0 ? SOCKET0 : sid; >> @@ -1773,7 +1856,6 @@ netdev_dpdk_set_config(struct netdev *netdev, const >> struct smap *args, >> dev->port_id = new_port_id; >> netdev_request_reconfigure(&dev->up); >> netdev_dpdk_clear_xstats(dev); >> - err = 0; >> } >> } >> } >> @@ -3236,11 +3318,18 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int >> argc OVS_UNUSED, >> char *response; >> dpdk_port_t port_id; >> struct netdev_dpdk *dev; >> - struct rte_eth_dev_info dev_info; >> + struct rte_device *rte_dev; >> + struct rte_eth_dev *eth_dev; >> + struct rte_dev_iterator iterator; >> >> ovs_mutex_lock(&dpdk_mutex); >> >> - if (rte_eth_dev_get_port_by_name(argv[1], &port_id)) { >> + RTE_ETH_FOREACH_MATCHING_DEV (port_id, argv[1], &iterator) { >> + /* If a break is done - must call rte_eth_iterator_cleanup. */ >> + rte_eth_iterator_cleanup(&iterator); >> + break; >> + } >> + if (port_id == DPDK_ETH_PORT_ID_INVALID) { >> response = xasprintf("Device '%s' not found in DPDK", argv[1]); >> goto error; >> } >> @@ -3253,15 +3342,26 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int >> argc OVS_UNUSED, >> goto error; >> } >> >> - rte_eth_dev_close(port_id); >> + /* FIXME: avoid direct access to DPDK internal array rte_eth_devices. >> */ >> + eth_dev = &rte_eth_devices[port_id]; >> + rte_dev = eth_dev->device; >> + >> + if ((eth_dev->data->dev_flags & RTE_ETH_DEV_CLOSE_REMOVE) && >> + (netdev_dpdk_get_num_ports(rte_dev) > 1)) { >> + response = xasprintf("Device '%s' is being shared with other " >> + "interfaces. Remove them before detaching.", >> + argv[1]); >> + goto error; >> + } >> >> - rte_eth_dev_info_get(port_id, &dev_info); >> - if (!dev_info.device || rte_dev_remove(dev_info.device)) { >> - response = xasprintf("Device '%s' can not be detached", argv[1]); >> + rte_eth_dev_close(port_id); >> + if (rte_dev_remove(rte_dev) < 0) { >> + response = xasprintf("Device '%s' can not be removed", >> + argv[1]); >> goto error; >> } >> >> - response = xasprintf("Device '%s' has been detached", argv[1]); >> + response = xasprintf("All devices shared with device '%s' " >> + "have been detached", argv[1]); >> >> ovs_mutex_unlock(&dpdk_mutex); >> unixctl_command_reply(conn, response); >> -- >> 1.8.3.1 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev