On 2 Jun 2023, at 16:13, Adrian Moreno wrote:
> Currently, the netdev's speed is being calculated by taking the link's
> feature bits (using netdev_get_features()) and transforming them into
> bps.
>
> This mechanism can be both inaccurate and difficult to maintain, mainly
> because we currently use the feature bits supported by OpenFlow which
> would have to be extended to support all new feature bits of all netdev
> implementations while keeping the OpenFlow API intact.
>
> In order to expose the link speed accurately for all current and future
> hardware, add a new netdev API call that allows the implementations to
> provide the current and maximum link speeds in Mbps.
>
> Internally, the logic to get the maximum supported speed still relies on
> feature bits so it might still get out of sync in the future. However,
> the maximum configurable speed is not used as much as the current speed
> and these feature bits are not exposed through the netdev interface so
> it should be easier to add more.
>
> Use this new function instead of netdev_get_features() where the link
> speed is needed.
>
> As a consecuence of this patch, link speeds of cards is properly
> reported (internally in OVSDB) even if not supported by OpenFlow.
> A test verifies this behavior using a tap device.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137567
> Signed-off-by: Adrian Moreno <[email protected]>
No review yet, but when experimenting with compiler options found one issue
below.
> ---
> include/openvswitch/netdev.h | 1 +
> lib/dpif.h | 4 ++-
> lib/netdev-bsd.c | 21 +++++++++++++++
> lib/netdev-dpdk.c | 52 ++++++++++++++++++++++++++++++++++++
> lib/netdev-linux-private.h | 1 +
> lib/netdev-linux.c | 46 ++++++++++++++++++++++++-------
> lib/netdev-provider.h | 9 +++++++
> lib/netdev.c | 23 ++++++++++++++++
> ofproto/ofproto-dpif-sflow.c | 11 ++++++--
> ofproto/ofproto.c | 6 +++--
> tests/atlocal.in | 3 +++
> tests/system-interface.at | 30 +++++++++++++++++++++
> vswitchd/bridge.c | 30 +++++++++++++--------
> 13 files changed, 212 insertions(+), 25 deletions(-)
>
> diff --git a/include/openvswitch/netdev.h b/include/openvswitch/netdev.h
> index cafd6fd7b..83e8633dd 100644
> --- a/include/openvswitch/netdev.h
> +++ b/include/openvswitch/netdev.h
> @@ -132,6 +132,7 @@ int netdev_get_features(const struct netdev *,
> enum netdev_features *advertised,
> enum netdev_features *supported,
> enum netdev_features *peer);
> +int netdev_get_speed(const struct netdev *, uint32_t *current, uint32_t
> *max);
> uint64_t netdev_features_to_bps(enum netdev_features features,
> uint64_t default_bps);
> bool netdev_features_is_full_duplex(enum netdev_features features);
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 129cbf6a1..9e9d0aa1b 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -91,7 +91,9 @@
> *
> * - Carrier status (netdev_get_carrier()).
> *
> - * - Speed (netdev_get_features()).
> + * - Link features (netdev_get_features()).
> + *
> + * - Speed (netdev_get_speed()).
> *
> * - QoS queue configuration (netdev_get_queue(), netdev_set_queue() and
> * related functions.)
> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
> index 7875636cc..9b5bf6e11 100644
> --- a/lib/netdev-bsd.c
> +++ b/lib/netdev-bsd.c
> @@ -1168,6 +1168,26 @@ cleanup:
> return error;
> }
>
> +static int
> +netdev_bsd_get_speed(const struct netdev *netdev, uint32_t *current,
> + uint32_t *max)
> +{
> + enum netdev_features f_current, f_supported, f_advertised, f_peer;
> + int error;
> +
> + error = netdev_bsd_get_features(netdev, &f_current, &f_advertised,
> + &f_supported, &f_peer);
> + if (error) {
> + return error;
> + }
> +
> + *current = MIN(UINT32_MAX,
> + netdev_features_to_bps(f_current, 0) / 1000000ULL);
> + *max= MIN(UINT32_MAX, netdev_features_to_bps(f_supported, 0) /
> 1000000ULL);
> +
> + return 0;
> +}
> +
> /*
> * Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If
> * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a
> @@ -1493,6 +1513,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum
> netdev_flags off,
> .get_carrier = netdev_bsd_get_carrier, \
> .get_stats = netdev_bsd_get_stats, \
> .get_features = netdev_bsd_get_features, \
> + .get_speed = netdev_bsd_get_speed, \
> .set_in4 = netdev_bsd_set_in4, \
> .get_addr_list = netdev_bsd_get_addr_list, \
> .get_next_hop = netdev_bsd_get_next_hop, \
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 6bf672d43..7baac7ad7 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -3537,6 +3537,57 @@ netdev_dpdk_get_features(const struct netdev *netdev,
> return 0;
> }
>
> +static int
> +netdev_dpdk_get_speed(const struct netdev *netdev, uint32_t *current,
> + uint32_t *max)
> +{
> + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> + struct rte_eth_dev_info dev_info;
> + struct rte_eth_link link;
> +
> + ovs_mutex_lock(&dev->mutex);
> + link = dev->link;
> + rte_eth_dev_info_get(dev->port_id, &dev_info);
> + ovs_mutex_unlock(&dev->mutex);
> +
> + *current = link.link_speed != RTE_ETH_SPEED_NUM_UNKNOWN
> + ? link.link_speed : 0;
> +
> + if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_200G) {
> + *max = RTE_ETH_SPEED_NUM_200G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_100G) {
> + *max = RTE_ETH_SPEED_NUM_100G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_56G) {
> + *max = RTE_ETH_SPEED_NUM_56G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_50G) {
> + *max = RTE_ETH_SPEED_NUM_50G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_40G) {
> + *max = RTE_ETH_SPEED_NUM_40G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_25G) {
> + *max = RTE_ETH_SPEED_NUM_25G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_20G) {
> + *max = RTE_ETH_SPEED_NUM_20G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_10G) {
> + *max = RTE_ETH_SPEED_NUM_10G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_5G) {
> + *max = RTE_ETH_SPEED_NUM_5G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_2_5G) {
> + *max = RTE_ETH_SPEED_NUM_2_5G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_1G) {
> + *max = RTE_ETH_SPEED_NUM_1G;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_100M ||
> + dev_info.speed_capa & RTE_ETH_LINK_SPEED_100M_HD) {
> + *max = RTE_ETH_SPEED_NUM_100M;
> + } else if (dev_info.speed_capa & RTE_ETH_LINK_SPEED_10M ||
> + dev_info.speed_capa & RTE_ETH_LINK_SPEED_10M_HD) {
> + *max = RTE_ETH_SPEED_NUM_10M;
> + } else {
> + *max = 0;
> + }
> +
> + return 0;
> +}
> +
> static struct ingress_policer *
> netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
> {
> @@ -5841,6 +5892,7 @@ parse_vhost_config(const struct smap *ovs_other_config)
> .get_stats = netdev_dpdk_get_stats, \
> .get_custom_stats = netdev_dpdk_get_custom_stats, \
> .get_features = netdev_dpdk_get_features, \
> + .get_speed = netdev_dpdk_get_speed, \
> .get_status = netdev_dpdk_get_status, \
> .reconfigure = netdev_dpdk_reconfigure, \
> .rxq_recv = netdev_dpdk_rxq_recv
> diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
> index deb015bdb..0ecf0f748 100644
> --- a/lib/netdev-linux-private.h
> +++ b/lib/netdev-linux-private.h
> @@ -92,6 +92,7 @@ struct netdev_linux {
> enum netdev_features current; /* Cached from ETHTOOL_GSET. */
> enum netdev_features advertised; /* Cached from ETHTOOL_GSET. */
> enum netdev_features supported; /* Cached from ETHTOOL_GSET. */
> + uint32_t current_speed; /* Cached from ETHTOOL_GSET. */
>
> struct ethtool_drvinfo drvinfo; /* Cached from ETHTOOL_GDRVINFO. */
> struct tc *tc;
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 36620199e..3055f88d2 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -2348,7 +2348,6 @@ static void
> netdev_linux_read_features(struct netdev_linux *netdev)
> {
> struct ethtool_cmd ecmd;
> - uint32_t speed;
> int error;
>
> if (netdev->cache_valid & VALID_FEATURES) {
> @@ -2462,20 +2461,20 @@ netdev_linux_read_features(struct netdev_linux
> *netdev)
> }
>
> /* Current settings. */
> - speed = ethtool_cmd_speed(&ecmd);
> - if (speed == SPEED_10) {
> + netdev->current_speed = ethtool_cmd_speed(&ecmd);
> + if (netdev->current_speed == SPEED_10) {
> netdev->current = ecmd.duplex ? NETDEV_F_10MB_FD : NETDEV_F_10MB_HD;
> - } else if (speed == SPEED_100) {
> + } else if (netdev->current_speed == SPEED_100) {
> netdev->current = ecmd.duplex ? NETDEV_F_100MB_FD :
> NETDEV_F_100MB_HD;
> - } else if (speed == SPEED_1000) {
> + } else if (netdev->current_speed == SPEED_1000) {
> netdev->current = ecmd.duplex ? NETDEV_F_1GB_FD : NETDEV_F_1GB_HD;
> - } else if (speed == SPEED_10000) {
> + } else if (netdev->current_speed == SPEED_10000) {
> netdev->current = NETDEV_F_10GB_FD;
> - } else if (speed == 40000) {
> + } else if (netdev->current_speed == 40000) {
> netdev->current = NETDEV_F_40GB_FD;
> - } else if (speed == 100000) {
> + } else if (netdev->current_speed == 100000) {
> netdev->current = NETDEV_F_100GB_FD;
> - } else if (speed == 1000000) {
> + } else if (netdev->current_speed == 1000000) {
> netdev->current = NETDEV_F_1TB_FD;
> } else {
> netdev->current = 0;
> @@ -2529,6 +2528,33 @@ exit:
> return error;
> }
>
> +static int
> +netdev_linux_get_speed(const struct netdev *netdev_, uint32_t *current,
> + uint32_t *max)
> +{
> + struct netdev_linux *netdev = netdev_linux_cast(netdev_);
> + int error;
> +
> + ovs_mutex_lock(&netdev->mutex);
> + if (netdev_linux_netnsid_is_remote(netdev)) {
> + error = EOPNOTSUPP;
> + goto exit;
> + }
> +
> + netdev_linux_read_features(netdev);
> + if (!netdev->get_features_error) {
> + *current = netdev->current_speed == SPEED_UNKNOWN ? 0
> + : netdev->current_speed;
> + *max = MIN(UINT32_MAX,
> + netdev_features_to_bps(netdev->supported, 0) /
> 1000000ULL);
> + }
> + error = netdev->get_features_error;
> +
> +exit:
> + ovs_mutex_unlock(&netdev->mutex);
> + return error;
> +}
> +
> /* Set the features advertised by 'netdev' to 'advertise'. */
> static int
> netdev_linux_set_advertisements(struct netdev *netdev_,
> @@ -3655,6 +3681,7 @@ const struct netdev_class netdev_linux_class = {
> .destruct = netdev_linux_destruct,
> .get_stats = netdev_linux_get_stats,
> .get_features = netdev_linux_get_features,
> + .get_speed = netdev_linux_get_speed,
> .get_status = netdev_linux_get_status,
> .get_block_id = netdev_linux_get_block_id,
> .send = netdev_linux_send,
> @@ -3671,6 +3698,7 @@ const struct netdev_class netdev_tap_class = {
> .destruct = netdev_linux_destruct,
> .get_stats = netdev_tap_get_stats,
> .get_features = netdev_linux_get_features,
> + .get_speed = netdev_linux_get_speed,
> .get_status = netdev_linux_get_status,
> .send = netdev_linux_send,
> .rxq_construct = netdev_linux_rxq_construct,
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index b5420947d..a7393c7ce 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -500,6 +500,15 @@ struct netdev_class {
> enum netdev_features *supported,
> enum netdev_features *peer);
>
> + /* Stores the current and maximum supported link speed by 'netdev' into
> + * each of '*current' and '*max'. Each value represents the speed in
> Mbps.
> + * If any of the speeds is unknown, a zero value must be stored.
> + *
> + * This function may be set to null if it would always return EOPNOTSUPP.
> + */
> + int (*get_speed)(const struct netdev *netdev, uint32_t *current,
> + uint32_t *max);
> +
> /* Set the features advertised by 'netdev' to 'advertise', which is a
> * set of NETDEV_F_* bits.
> *
> diff --git a/lib/netdev.c b/lib/netdev.c
> index c79778378..2fe7d8bad 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -1167,6 +1167,29 @@ netdev_get_features(const struct netdev *netdev,
> return error;
> }
>
> +int
> +netdev_get_speed(const struct netdev *netdev, uint32_t *current, uint32_t
> *max)
> +{
> + uint32_t current_dummy, max_dummy;
> + int error;
> +
> + if (!current) {
> + current = ¤t_dummy;
> + }
> + if (!max) {
> + max = &max_dummy;
> + }
> +
> + error = netdev->netdev_class->get_speed
> + ? netdev->netdev_class->get_speed(netdev, current, max)
> + : EOPNOTSUPP;
> +
> + if (error) {
> + *current = *max = 0;
> + }
> + return error;
> +}
> +
> /* Returns the maximum speed of a network connection that has the NETDEV_F_*
> * bits in 'features', in bits per second. If no bits that indicate a speed
> * are set in 'features', returns 'default_bps'. */
> diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
> index a405eb056..a3c83bac8 100644
> --- a/ofproto/ofproto-dpif-sflow.c
> +++ b/ofproto/ofproto-dpif-sflow.c
> @@ -306,6 +306,7 @@ sflow_agent_get_counters(void *ds_, SFLPoller *poller,
> struct netdev_stats stats;
> enum netdev_flags flags;
> struct lacp_member_stats lacp_stats;
> + uint32_t curr_speed;
> const char *ifName;
>
> dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
> @@ -320,13 +321,19 @@ sflow_agent_get_counters(void *ds_, SFLPoller *poller,
> if (!netdev_get_features(dsp->ofport->netdev, ¤t, NULL, NULL,
> NULL)) {
> /* The values of ifDirection come from MAU MIB (RFC 2668): 0 =
> unknown,
> 1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
> - counters->ifSpeed = netdev_features_to_bps(current, 0);
> counters->ifDirection = (netdev_features_is_full_duplex(current)
> ? 1 : 2);
> } else {
> - counters->ifSpeed = 100000000;
> counters->ifDirection = 0;
> }
> +
> + netdev_get_speed(dsp->ofport->netdev, &curr_speed, NULL);
> + if (curr_speed) {
> + counters->ifSpeed = curr_speed * 1000000;
> + } else {
> + counters->ifSpeed = 100000000;
> + }
> +
> if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP)
> {
> counters->ifStatus = 1; /* ifAdminStatus up. */
> if (netdev_get_carrier(dsp->ofport->netdev)) {
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index 11cc0c6f6..dbf4958bc 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -2476,6 +2476,7 @@ ofport_open(struct ofproto *ofproto,
> struct ofputil_phy_port *pp,
> struct netdev **p_netdev)
> {
> + uint32_t curr_speed, max_speed;
> enum netdev_flags flags;
> struct netdev *netdev;
> int error;
> @@ -2514,8 +2515,9 @@ ofport_open(struct ofproto *ofproto,
> pp->state = netdev_get_carrier(netdev) ? 0 : OFPUTIL_PS_LINK_DOWN;
> netdev_get_features(netdev, &pp->curr, &pp->advertised,
> &pp->supported, &pp->peer);
> - pp->curr_speed = netdev_features_to_bps(pp->curr, 0) / 1000;
> - pp->max_speed = netdev_features_to_bps(pp->supported, 0) / 1000;
> + netdev_get_speed(netdev, &curr_speed, &max_speed);
> + pp->curr_speed = curr_speed * 1000;
> + pp->max_speed = max_speed * 1000;
>
> *p_netdev = netdev;
> return 0;
> diff --git a/tests/atlocal.in b/tests/atlocal.in
> index 859668586..4909c9d08 100644
> --- a/tests/atlocal.in
> +++ b/tests/atlocal.in
> @@ -178,6 +178,9 @@ find_command tcpdump
> # Set HAVE_LFTP
> find_command lftp
>
> +# Set HAVE_ETHTOOL
> +find_command ethtool
> +
> CURL_OPT="-g -v --max-time 1 --retry 2 --retry-delay 1 --connect-timeout 1"
>
> # Determine whether "diff" supports "normal" diffs. (busybox diff does not.)
> diff --git a/tests/system-interface.at b/tests/system-interface.at
> index 3bf339582..148f011c7 100644
> --- a/tests/system-interface.at
> +++ b/tests/system-interface.at
> @@ -122,3 +122,33 @@ AT_CHECK([ip link show | grep " genev_sys_[[0-9]]*: .*
> ovs-system " | diff -u -
>
> OVS_TRAFFIC_VSWITCHD_STOP
> AT_CLEANUP
> +
> +AT_SETUP([interface - current speed])
> +AT_SKIP_IF([test $HAVE_ETHTOOL = "no"])
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +AT_CHECK([ip tuntap add tap0 mode tap])
> +on_exit 'ip tuntap del tap0 mode tap'
> +
> +AT_CHECK([ip link set dev tap0 address aa:55:aa:55:00:01])
> +AT_CHECK([ethtool -s tap0 speed 50000 duplex full])
> +AT_CHECK([ip link set dev tap0 up])
> +
> +AT_CHECK([ovs-vsctl add-port br0 tap0 -- set int tap0 type=tap])
> +
> +AT_CHECK([ovs-ofctl -O OpenFlow15 -vwarn dump-ports-desc br0 tap0], [0],
> [stdout])
> +AT_CHECK([strip_xids < stdout], [0], [dnl
> +OFPST_PORT_DESC reply (OF1.5):
> + 1(tap0): addr:aa:55:aa:55:00:01
> + config: 0
> + state: LIVE
> + current: COPPER
> + speed: 50000 Mbps now, 0 Mbps max
> +])
> +
> +AT_CHECK([ovs-vsctl get interface tap0 link_speed], [0], [dnl
> +50000000000
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index f5dc59ad0..55c0b1213 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -1694,11 +1694,12 @@ port_configure_stp(const struct ofproto *ofproto,
> struct port *port,
> if (config_str) {
> port_s->path_cost = strtoul(config_str, NULL, 10);
> } else {
> - enum netdev_features current;
> - unsigned int mbps;
> + uint32_t mbps;
>
> - netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL);
> - mbps = netdev_features_to_bps(current, NETDEV_DEFAULT_BPS) / 1000000;
> + netdev_get_speed(iface->netdev, &mbps, NULL);
> + if (!mbps) {
> + mbps = NETDEV_DEFAULT_BPS / 1000000;
> + }
> port_s->path_cost = stp_convert_speed_to_cost(mbps);
> }
>
> @@ -1777,11 +1778,12 @@ port_configure_rstp(const struct ofproto *ofproto,
> struct port *port,
> if (config_str) {
> port_s->path_cost = strtoul(config_str, NULL, 10);
> } else {
> - enum netdev_features current;
> - unsigned int mbps;
> + uint32_t mbps;
>
> - netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL);
> - mbps = netdev_features_to_bps(current, NETDEV_DEFAULT_BPS) / 1000000;
> + netdev_get_speed(iface->netdev, &mbps, NULL);
> + if (!mbps) {
> + mbps = NETDEV_DEFAULT_BPS / 1000000;
> + }
> port_s->path_cost = rstp_convert_speed_to_cost(mbps);
> }
>
> @@ -2418,6 +2420,7 @@ iface_refresh_netdev_status(struct iface *iface)
> struct eth_addr mac;
> int64_t bps, mtu_64, ifindex64, link_resets;
> int mtu, error;
> + uint32_t mbps;
>
> if (iface_is_synthetic(iface)) {
> return;
> @@ -2456,14 +2459,19 @@ iface_refresh_netdev_status(struct iface *iface)
> ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1);
>
> error = netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL);
> - bps = !error ? netdev_features_to_bps(current, 0) : 0;
> - if (bps) {
> + if (!error) {
> ovsrec_interface_set_duplex(iface->cfg,
> netdev_features_is_full_duplex(current)
> ? "full" : "half");
> - ovsrec_interface_set_link_speed(iface->cfg, &bps, 1);
> } else {
> ovsrec_interface_set_duplex(iface->cfg, NULL);
> + }
> +
> + error = netdev_get_speed(iface->netdev, &mbps, NULL);
No review yet, but when playing with `make clang-analyze` it flagged this as a
new issue; Value stored to 'error' is never read.
> + if (mbps) {
> + bps = mbps * 1000000ULL;
> + ovsrec_interface_set_link_speed(iface->cfg, &bps, 1);
> + } else {
> ovsrec_interface_set_link_speed(iface->cfg, NULL, 0);
> }
>
> --
> 2.40.1
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev