On 4/21/23 17:16, Adrian Moreno wrote: > Currently, the netdev's speed is being calculated by taking the link's > feature bits (using netdev_get_features()) and transforming them into > bps. > > This mechanism can be both inaccurate and difficult to maintain, mainly > because we currently use the feature bits supported by OpenFlow which > would have to be extended to support all new feature bits of all netdev > implementations while keeping the OpenFlow API intact. > > In order to expose the link speed accurately for all current and future > hardware, add a new netdev API call that allows the implementations to > provide the current and maximum link speeds in Mbps. > > Internally, the logic to get the maximum supported speed still relies on > feature bits so it might still get out of sync in the future. However, > the maximum configurable speed is not used as much as the current speed > and these feature bits are not exposed through the netdev interface so > it should be easier to add more. > > Use this new function instead of netdev_get_features() where the link > speed is needed. > > As a consecuence of this patch, link speeds of cards is properly > reported (internally in OVSDB) even if not supported by OpenFlow.
Hi, Adrian. Thanks for the set! I didn't test it, but I have a few comments inline. Best regards, Ilya Maximets. > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137567 I'm not sure about this link. The BZ talks about switching to ETHTOOL_GLINKSETTINGS interface to get speeds that do not have predefined values. Current patch is a step forward to be able to support other link speeds, but IIUC, we will still not able to detect 25 and 50 Gbps links in netdev-linux. > Signed-off-by: Adrian Moreno <[email protected]> > --- > include/openvswitch/netdev.h | 1 + > lib/dpif.h | 4 ++- > lib/netdev-dpdk.c | 48 ++++++++++++++++++++++++++++++++++++ > lib/netdev-linux-private.h | 1 + > lib/netdev-linux.c | 44 ++++++++++++++++++++++++++------- > lib/netdev-provider.h | 9 +++++++ > lib/netdev.c | 23 +++++++++++++++++ > ofproto/ofproto-dpif-sflow.c | 10 ++++++-- > ofproto/ofproto.c | 6 +++-- > vswitchd/bridge.c | 30 +++++++++++++--------- > 10 files changed, 151 insertions(+), 25 deletions(-) > > diff --git a/include/openvswitch/netdev.h b/include/openvswitch/netdev.h > index cafd6fd7b..83e8633dd 100644 > --- a/include/openvswitch/netdev.h > +++ b/include/openvswitch/netdev.h > @@ -132,6 +132,7 @@ int netdev_get_features(const struct netdev *, > enum netdev_features *advertised, > enum netdev_features *supported, > enum netdev_features *peer); > +int netdev_get_speed(const struct netdev *, uint32_t *current, uint32_t > *max); > uint64_t netdev_features_to_bps(enum netdev_features features, > uint64_t default_bps); > bool netdev_features_is_full_duplex(enum netdev_features features); > diff --git a/lib/dpif.h b/lib/dpif.h > index 129cbf6a1..9e9d0aa1b 100644 > --- a/lib/dpif.h > +++ b/lib/dpif.h > @@ -91,7 +91,9 @@ > * > * - Carrier status (netdev_get_carrier()). > * > - * - Speed (netdev_get_features()). > + * - Link features (netdev_get_features()). > + * > + * - Speed (netdev_get_speed()). > * > * - QoS queue configuration (netdev_get_queue(), netdev_set_queue() and > * related functions.) > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index fb0dd43f7..b136f25a4 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -3505,6 +3505,53 @@ netdev_dpdk_get_features(const struct netdev *netdev, > return 0; > } > > +static int > +netdev_dpdk_get_speed(const struct netdev *netdev, uint32_t *current, > + uint32_t *max) > +{ > + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + struct rte_eth_link link; > + struct rte_eth_dev_info dev_info; Reverse x-mass tree, please. > + > + ovs_mutex_lock(&dev->mutex); > + link = dev->link; > + rte_eth_dev_info_get(dev->port_id, &dev_info); > + ovs_mutex_unlock(&dev->mutex); > + > + *current = link.link_speed != RTE_ETH_SPEED_NUM_UNKNOWN > + ? link.link_speed : 0; > + > + if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_200G) { > + *max = RTE_ETH_SPEED_NUM_200G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_100G) { > + *max = RTE_ETH_SPEED_NUM_100G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_56G) { > + *max = RTE_ETH_SPEED_NUM_56G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_50G) { > + *max = RTE_ETH_SPEED_NUM_50G; Should there be 25 and 40 as well? > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_20G) { > + *max = RTE_ETH_SPEED_NUM_20G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_10G) { > + *max = RTE_ETH_SPEED_NUM_10G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_5G) { > + *max = RTE_ETH_SPEED_NUM_5G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_2_5G) { > + *max = RTE_ETH_SPEED_NUM_2_5G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_1G) { > + *max = RTE_ETH_SPEED_NUM_1G; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_100M || > + dev_info.speed_capa & RTE_ETH_LINK_SPEED_100M_HD) { > + *max = RTE_ETH_SPEED_NUM_100M; > + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_10M || > + dev_info.speed_capa & RTE_ETH_LINK_SPEED_10M_HD) { > + *max = RTE_ETH_SPEED_NUM_10M; > + } else { > + *max = 0; > + } > + > + return 0; > +} > + > static struct ingress_policer * > netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst) > { > @@ -5809,6 +5856,7 @@ parse_vhost_config(const struct smap *ovs_other_config) > .get_stats = netdev_dpdk_get_stats, \ > .get_custom_stats = netdev_dpdk_get_custom_stats, \ > .get_features = netdev_dpdk_get_features, \ > + .get_speed = netdev_dpdk_get_speed, \ > .get_status = netdev_dpdk_get_status, \ > .reconfigure = netdev_dpdk_reconfigure, \ > .rxq_recv = netdev_dpdk_rxq_recv > diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h > index deb015bdb..39508090c 100644 > --- a/lib/netdev-linux-private.h > +++ b/lib/netdev-linux-private.h > @@ -92,6 +92,7 @@ struct netdev_linux { > enum netdev_features current; /* Cached from ETHTOOL_GSET. */ > enum netdev_features advertised; /* Cached from ETHTOOL_GSET. */ > enum netdev_features supported; /* Cached from ETHTOOL_GSET. */ > + uint32_t speed; /* Cached from ETHTOOL_GSET. */ Might make sense to name it 'current_speed' ? > > struct ethtool_drvinfo drvinfo; /* Cached from ETHTOOL_GDRVINFO. */ > struct tc *tc; > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 36620199e..576adbf3f 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -2348,7 +2348,6 @@ static void > netdev_linux_read_features(struct netdev_linux *netdev) > { > struct ethtool_cmd ecmd; > - uint32_t speed; > int error; > > if (netdev->cache_valid & VALID_FEATURES) { > @@ -2462,20 +2461,20 @@ netdev_linux_read_features(struct netdev_linux > *netdev) > } > > /* Current settings. */ > - speed = ethtool_cmd_speed(&ecmd); > - if (speed == SPEED_10) { > + netdev->speed = ethtool_cmd_speed(&ecmd); > + if (netdev->speed == SPEED_10) { > netdev->current = ecmd.duplex ? NETDEV_F_10MB_FD : NETDEV_F_10MB_HD; > - } else if (speed == SPEED_100) { > + } else if (netdev->speed == SPEED_100) { > netdev->current = ecmd.duplex ? NETDEV_F_100MB_FD : > NETDEV_F_100MB_HD; > - } else if (speed == SPEED_1000) { > + } else if (netdev->speed == SPEED_1000) { > netdev->current = ecmd.duplex ? NETDEV_F_1GB_FD : NETDEV_F_1GB_HD; > - } else if (speed == SPEED_10000) { > + } else if (netdev->speed == SPEED_10000) { > netdev->current = NETDEV_F_10GB_FD; > - } else if (speed == 40000) { > + } else if (netdev->speed == 40000) { > netdev->current = NETDEV_F_40GB_FD; > - } else if (speed == 100000) { > + } else if (netdev->speed == 100000) { > netdev->current = NETDEV_F_100GB_FD; > - } else if (speed == 1000000) { > + } else if (netdev->speed == 1000000) { > netdev->current = NETDEV_F_1TB_FD; > } else { > netdev->current = 0; > @@ -2529,6 +2528,31 @@ exit: > return error; > } > > +static int > +netdev_linux_get_speed(const struct netdev *netdev_, uint32_t *current, > + uint32_t *max) > +{ > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > + int error; > + > + ovs_mutex_lock(&netdev->mutex); > + if (netdev_linux_netnsid_is_remote(netdev)) { > + error = EOPNOTSUPP; > + goto exit; > + } > + > + netdev_linux_read_features(netdev); > + if (!netdev->get_features_error) { > + *current = netdev->speed == SPEED_UNKNOWN ? 0 : netdev->speed; > + *max = netdev_features_to_bps(netdev->supported, 0) / 1000000ULL; > + } > + error = netdev->get_features_error; > + > +exit: > + ovs_mutex_unlock(&netdev->mutex); > + return error; > +} > + > /* Set the features advertised by 'netdev' to 'advertise'. */ > static int > netdev_linux_set_advertisements(struct netdev *netdev_, > @@ -3655,6 +3679,7 @@ const struct netdev_class netdev_linux_class = { > .destruct = netdev_linux_destruct, > .get_stats = netdev_linux_get_stats, > .get_features = netdev_linux_get_features, > + .get_speed = netdev_linux_get_speed, > .get_status = netdev_linux_get_status, > .get_block_id = netdev_linux_get_block_id, > .send = netdev_linux_send, > @@ -3671,6 +3696,7 @@ const struct netdev_class netdev_tap_class = { > .destruct = netdev_linux_destruct, > .get_stats = netdev_tap_get_stats, > .get_features = netdev_linux_get_features, > + .get_speed = netdev_linux_get_speed, > .get_status = netdev_linux_get_status, > .send = netdev_linux_send, > .rxq_construct = netdev_linux_rxq_construct, > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h > index b5420947d..cf17aaea9 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -500,6 +500,15 @@ struct netdev_class { > enum netdev_features *supported, > enum netdev_features *peer); > > + /* Stores the current and maximum supported link speed by 'netdev' into > + * each of '*current' and '*max'. Each value represents the speed in > Mbps. > + * If any of the speeds is unknown, a zero value must be returned. s/returned/stored/ ? > + * > + * This function may be set to null if it would always return EOPNOTSUPP. > + */ > + int (*get_speed)(const struct netdev *netdev, uint32_t *current, > + uint32_t *max); > + > /* Set the features advertised by 'netdev' to 'advertise', which is a > * set of NETDEV_F_* bits. > * > diff --git a/lib/netdev.c b/lib/netdev.c > index c79778378..aea4e2d46 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -1167,6 +1167,29 @@ netdev_get_features(const struct netdev *netdev, > return error; > } > > +int > +netdev_get_speed(const struct netdev *netdev, uint32_t *current, uint32_t > *max) > +{ > + uint32_t current_dummy, max_dummy; > + int error; > + > + if (!current) { > + current = ¤t_dummy; > + } > + if (!max) { > + max = &max_dummy; > + } > + > + error = netdev->netdev_class->get_speed > + ? netdev->netdev_class->get_speed(netdev, current, max) > + : EOPNOTSUPP; Please, indent ? and : to the level where condition starts, i.e. shift 4 spaces to the rigth. Same goes to other ternary blocks in the patch set. > + > + if (error) { > + *current = *max = 0; > + } > + return error; > +} > + > /* Returns the maximum speed of a network connection that has the NETDEV_F_* > * bits in 'features', in bits per second. If no bits that indicate a speed > * are set in 'features', returns 'default_bps'. */ > diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c > index a405eb056..7a8f259a2 100644 > --- a/ofproto/ofproto-dpif-sflow.c > +++ b/ofproto/ofproto-dpif-sflow.c > @@ -307,6 +307,7 @@ sflow_agent_get_counters(void *ds_, SFLPoller *poller, > enum netdev_flags flags; > struct lacp_member_stats lacp_stats; > const char *ifName; > + uint32_t curr_speed; We don't have a reverse x-mass tree here, but it might be better to not make it worse. > > dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort)); > if (!dsp) { > @@ -320,13 +321,18 @@ sflow_agent_get_counters(void *ds_, SFLPoller *poller, > if (!netdev_get_features(dsp->ofport->netdev, ¤t, NULL, NULL, > NULL)) { > /* The values of ifDirection come from MAU MIB (RFC 2668): 0 = > unknown, > 1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */ > - counters->ifSpeed = netdev_features_to_bps(current, 0); > counters->ifDirection = (netdev_features_is_full_duplex(current) > ? 1 : 2); > } else { > - counters->ifSpeed = 100000000; > counters->ifDirection = 0; > } > + > + if (!netdev_get_speed(dsp->ofport->netdev, &curr_speed, NULL)) { > + counters->ifSpeed = curr_speed * 1000000; > + } else { > + counters->ifSpeed = 100000000; > + } > + > if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) > { > counters->ifStatus = 1; /* ifAdminStatus up. */ > if (netdev_get_carrier(dsp->ofport->netdev)) { > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c > index 11cc0c6f6..2df210921 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -2478,6 +2478,7 @@ ofport_open(struct ofproto *ofproto, > { > enum netdev_flags flags; > struct netdev *netdev; > + uint32_t curr_speed, max_speed; > int error; ditto. > > *p_netdev = NULL; > @@ -2514,8 +2515,9 @@ ofport_open(struct ofproto *ofproto, > pp->state = netdev_get_carrier(netdev) ? 0 : OFPUTIL_PS_LINK_DOWN; > netdev_get_features(netdev, &pp->curr, &pp->advertised, > &pp->supported, &pp->peer); > - pp->curr_speed = netdev_features_to_bps(pp->curr, 0) / 1000; > - pp->max_speed = netdev_features_to_bps(pp->supported, 0) / 1000; > + netdev_get_speed(netdev, &curr_speed, &max_speed); > + pp->curr_speed = curr_speed * 1000; > + pp->max_speed = max_speed * 1000; > > *p_netdev = netdev; > return 0; > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index f5dc59ad0..2059f7315 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -1694,11 +1694,12 @@ port_configure_stp(const struct ofproto *ofproto, > struct port *port, > if (config_str) { > port_s->path_cost = strtoul(config_str, NULL, 10); > } else { > - enum netdev_features current; > - unsigned int mbps; > + uint32_t mbps; > > - netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL); > - mbps = netdev_features_to_bps(current, NETDEV_DEFAULT_BPS) / 1000000; > + netdev_get_speed(iface->netdev, &mbps, NULL); > + if (mbps == 0) { If (!mbps) ? > + mbps = NETDEV_DEFAULT_BPS / 1000000; > + } > port_s->path_cost = stp_convert_speed_to_cost(mbps); > } > > @@ -1777,11 +1778,12 @@ port_configure_rstp(const struct ofproto *ofproto, > struct port *port, > if (config_str) { > port_s->path_cost = strtoul(config_str, NULL, 10); > } else { > - enum netdev_features current; > - unsigned int mbps; > + uint32_t mbps; > > - netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL); > - mbps = netdev_features_to_bps(current, NETDEV_DEFAULT_BPS) / 1000000; > + netdev_get_speed(iface->netdev, &mbps, NULL); > + if (mbps == 0) { > + mbps = NETDEV_DEFAULT_BPS / 1000000; > + } > port_s->path_cost = rstp_convert_speed_to_cost(mbps); > } > > @@ -2417,6 +2419,7 @@ iface_refresh_netdev_status(struct iface *iface) > const char *link_state; > struct eth_addr mac; > int64_t bps, mtu_64, ifindex64, link_resets; > + uint32_t mbps; > int mtu, error; Same here. > > if (iface_is_synthetic(iface)) { > @@ -2456,14 +2459,19 @@ iface_refresh_netdev_status(struct iface *iface) > ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1); > > error = netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL); > - bps = !error ? netdev_features_to_bps(current, 0) : 0; > - if (bps) { > + if (!error) { > ovsrec_interface_set_duplex(iface->cfg, > netdev_features_is_full_duplex(current) > ? "full" : "half"); > - ovsrec_interface_set_link_speed(iface->cfg, &bps, 1); > } else { > ovsrec_interface_set_duplex(iface->cfg, NULL); > + } > + > + error = netdev_get_speed(iface->netdev, &mbps, NULL); > + if (!error) { Should we check for error or for mbps being zero? > + bps = (uint64_t) mbps * 1000000; > + ovsrec_interface_set_link_speed(iface->cfg, &bps, 1); > + } else { > ovsrec_interface_set_link_speed(iface->cfg, NULL, 0); > } > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
