On Thu, Feb 12, 2026 at 07:03:01AM +0000, Mingjin Ye wrote:
> Add the `link_status_poll_ms` devargs parameter (in milliseconds).
> If greater than zero, ICE PF uses an alarm handler to poll link
> status periodically.
>
> Signed-off-by: Mingjin Ye <[email protected]>
Hi Mingjin,
thanks for this. See review comments inline. Nothing major, but they do
need to be fixed before this change gets merged.
/Bruce
> ---
> doc/guides/nics/ice.rst | 7 +++
> drivers/net/intel/ice/ice_ethdev.c | 95 ++++++++++++++++++++++++++++--
> drivers/net/intel/ice/ice_ethdev.h | 1 +
> 3 files changed, 99 insertions(+), 4 deletions(-)
>
> diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst
> index 216c79b6f2..0eded96826 100644
> --- a/doc/guides/nics/ice.rst
> +++ b/doc/guides/nics/ice.rst
> @@ -382,6 +382,13 @@ Runtime Configuration
> * ``segment``: Check number of mbuf segments does not exceed HW limits.
> * ``offload``: Check for use of an unsupported offload flag.
>
> +- ``Link status poll`` (default ``not enabled``)
> +
> + Link status polling can be enabled by setting the ``devargs``
> + parameter ``link_status_poll_ms`` (in milliseconds). If the value is set
> + to 0 or negative, polling is disabled.
> + For example ``-a 81:00.0,link_status_poll_ms=50`` or ``-a
> 81:00.0,link_status_poll_ms=0``.
> +
> Driver compilation and testing
> ------------------------------
>
> diff --git a/drivers/net/intel/ice/ice_ethdev.c
> b/drivers/net/intel/ice/ice_ethdev.c
> index ade13600de..ae891caca5 100644
> --- a/drivers/net/intel/ice/ice_ethdev.c
> +++ b/drivers/net/intel/ice/ice_ethdev.c
> @@ -15,6 +15,7 @@
>
> #include <rte_tailq.h>
> #include <rte_os_shim.h>
> +#include <rte_alarm.h>
>
> #include "eal_firmware.h"
>
> @@ -43,6 +44,7 @@
> #define ICE_TM_LEVELS_ARG "tm_sched_levels"
> #define ICE_SOURCE_PRUNE_ARG "source-prune"
> #define ICE_LINK_STATE_ON_CLOSE "link_state_on_close"
> +#define ICE_LINK_STATE_POLL_MS_ARG "link_status_poll_ms"
>
> #define ICE_CYCLECOUNTER_MASK 0xffffffffffffffffULL
>
> @@ -61,6 +63,7 @@ static const char * const ice_valid_args[] = {
> ICE_TM_LEVELS_ARG,
> ICE_SOURCE_PRUNE_ARG,
> ICE_LINK_STATE_ON_CLOSE,
> + ICE_LINK_STATE_POLL_MS_ARG,
> NULL
> };
>
> @@ -1482,6 +1485,38 @@ ice_handle_aq_msg(struct rte_eth_dev *dev)
> rte_free(event.msg_buf);
> }
> #endif
> +static void
> +ice_link_once_update(void *cb_arg)
> +{
> + int ret;
> + struct timespec sys_time;
> + struct rte_eth_dev *dev = cb_arg;
> +
> + ret = ice_link_update(dev, 0);
> + if (!ret) {
> + clock_gettime(CLOCK_REALTIME, &sys_time);
> + PMD_DRV_LOG(INFO, "Current SYS Time: %.24s %.9ld ns",
> + ctime(&sys_time.tv_sec), sys_time.tv_nsec);
> +
What is the purpose of this log message? It just reports the time without
giving any extra info. Can we skip it, or else expand it to give meaningful
information, as well as reducing it to DEBUG level?
> + rte_eth_dev_callback_process
> + (dev, RTE_ETH_EVENT_INTR_LSC, NULL);
> + }
> +}
> +
> +static void
> +ice_link_cycle_update(void *cb_arg)
> +{
> + struct rte_eth_dev *dev = cb_arg;
> + struct ice_adapter *adapter =
> + ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +
> + ice_link_once_update(cb_arg);
> +
> + /* re-alarm link update */
> + if (rte_eal_alarm_set(adapter->devargs.link_status_poll_ms,
> + &ice_link_cycle_update, cb_arg))
> + PMD_DRV_LOG(ERR, "Failed to enable cycle link update");
I'm wondering how best to handle this condition. Logging alone doesn't seem
sufficient.
> +}
>
> /**
> * Interrupt handler triggered by NIC for handling
> @@ -1499,6 +1534,8 @@ static void
> ice_interrupt_handler(void *param)
> {
> struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
> + struct ice_adapter *adapter =
> + ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> uint32_t oicr;
> uint32_t reg;
> @@ -1533,10 +1570,19 @@ ice_interrupt_handler(void *param)
> #else
> if (oicr & PFINT_OICR_LINK_STAT_CHANGE_M) {
> PMD_DRV_LOG(INFO, "OICR: link state change event");
> - ret = ice_link_update(dev, 0);
> - if (!ret)
> - rte_eth_dev_callback_process
> - (dev, RTE_ETH_EVENT_INTR_LSC, NULL);
> + if (adapter->devargs.link_status_poll_ms <= 0) {
> + ret = ice_link_update(dev, 0);
> + if (!ret)
According to DPDK coding style, this should be "if (ret == 0)". Since the
line is being changed, might as well fix it.
> + rte_eth_dev_callback_process
> + (dev, RTE_ETH_EVENT_INTR_LSC, NULL);
> + } else {
> + /* With link status polling enabled, "link_status"
> + * updates are confined to the alarm thread to
> + * avoid race conditions.
> + */
> + if (rte_eal_alarm_set(1, &ice_link_once_update, dev))
> + PMD_DRV_LOG(ERR, "Failed to enable link
> update");
If link status polling is being used, then do we need to schedule this?
Certainly if the current status is down, we are probably ok to leave the
link up notification until next scheduled poll so that the link has time to
stabilize.
However, there is another case we might want to look at here. If we
previously had a failure to reschedule the alarm timer i.e. the case above
there rte_eal_alarm_set failed, this would be a good place to rectify that
by trying to reschedule the update. More specifically, rather than assuming
that if link_status_poll_ms > 0 then we are polling, it would be better to
have a flag that is set to indicate that a poll is scheduled. On start,
when the alarm is set, we set the flag to true, but clear it to zero if at
any point the alarm setting fails. We can then do recovery here, AND also
do an explicit link update if no alarm is scheduled.
For example, instead of "if (adapter->devargs.link_status_poll_ms <= 0)",
change our conditions to:
if (!adapter->lsc_polling) {
ret = ice_link_update(...); /* do manual update */
if (adapter->devargs.link_status_poll_ms > 0) {
/* we had an alarm set failure, so set new */
rte_eal_alarm_set(...)
...
}
} else {
/* we have a scheduled alarm */
/* do we need to do anything here,
* or explicilty do one delayed update?
*/
...
}
> + }
> }
> #endif
>
> @@ -2381,6 +2427,25 @@ ice_parse_mbuf_check(__rte_unused const char *key,
> const char *value, void *args
> return ret;
> }
>
> +static int
> +ice_parse_link_status_poll_ms(__rte_unused const char *key,
> + const char *value, void *args)
> +{
> + int *num = (int *)args;
nit: no need for cast, void * can be assigned to any other pointer type
> + int tmp;
> +
> + if (value == NULL || args == NULL)
> + return -EINVAL;
> +
> + tmp = strtoul(value, NULL, 10);
> + if (errno == EINVAL || errno == ERANGE)
> + return -1;
For safety, I think you should set errno explicitly == 0 before calling
strtoul.
> +
> + *num = tmp * 1000;
> +
This is functionally correct in that it converts the ms value to
microseconds for use with the alarm callback. However, for future
developers, this is terrible! The variables used through all have "ms" in
the suffix, which means that you are telling future readers of the code
that you as passing ms values around when in fact you are passing us ones!
Two possible fixes. Either:
1. rename the internal variables to use "_us" rather than "_ms" in the name
2. remove this and do the multiplication when calling alarm_set each time.
I'd tend towards option #2 here, because if we do store a _us value - what
does the function get called - it parses a ms value, but returns a us one?
> + return 0;
> +}
> +
> static int ice_parse_devargs(struct rte_eth_dev *dev)
> {
> struct ice_adapter *ad =
> @@ -2469,6 +2534,12 @@ static int ice_parse_devargs(struct rte_eth_dev *dev)
>
> ret = rte_kvargs_process(kvlist, ICE_LINK_STATE_ON_CLOSE,
> &parse_link_state_on_close,
> &ad->devargs.link_state_on_close);
> + if (ret)
> + goto bail;
> +
> + ret = rte_kvargs_process(kvlist, ICE_LINK_STATE_POLL_MS_ARG,
> + &ice_parse_link_status_poll_ms,
> + &ad->devargs.link_status_poll_ms);
>
> bail:
> rte_kvargs_free(kvlist);
> @@ -2902,6 +2973,9 @@ ice_dev_stop(struct rte_eth_dev *dev)
> if (pf->adapter_stopped)
> return 0;
>
> + if (ad->devargs.link_status_poll_ms > 0)
> + rte_eal_alarm_cancel(&ice_link_cycle_update, (void *)dev);
> +
> /* stop and clear all Rx queues */
> for (i = 0; i < data->nb_rx_queues; i++)
> ice_rx_queue_stop(dev, i);
> @@ -4398,6 +4472,8 @@ ice_dev_start(struct rte_eth_dev *dev)
> struct rte_eth_dev_data *data = dev->data;
> struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> + struct rte_pci_device *pci_dev = ICE_DEV_TO_PCI(dev);
> + struct rte_intr_handle *intr_handle = pci_dev->intr_handle;
> struct ice_vsi *vsi = pf->main_vsi;
> struct ice_adapter *ad =
> ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> @@ -4474,8 +4550,19 @@ ice_dev_start(struct rte_eth_dev *dev)
>
> ice_dev_set_link_up(dev);
>
> + /* Disable interrupts to avoid race on link status between callback and
> main thread. */
> + rte_intr_disable(intr_handle);
> /* Call get_link_info aq command to enable/disable LSE */
> ice_link_update(dev, 1);
> + rte_intr_enable(intr_handle);
> +
> + if (ad->devargs.link_status_poll_ms > 0) {
> + if (rte_eal_alarm_set(ad->devargs.link_status_poll_ms,
> + &ice_link_cycle_update, (void *)dev) <
> 0) {
> + PMD_DRV_LOG(ERR, "Failed to enable cycle link update");
> + goto rx_err;
> + }
> + }
>
> pf->adapter_stopped = false;
>
> diff --git a/drivers/net/intel/ice/ice_ethdev.h
> b/drivers/net/intel/ice/ice_ethdev.h
> index 9562b2b8f1..781fa3d6ae 100644
> --- a/drivers/net/intel/ice/ice_ethdev.h
> +++ b/drivers/net/intel/ice/ice_ethdev.h
> @@ -631,6 +631,7 @@ struct ice_devargs {
> char xtr_field_name[RTE_MBUF_DYN_NAMESIZE];
> uint64_t mbuf_check;
> const char *ddp_filename;
> + uint32_t link_status_poll_ms;
> };
>
> /**
> --
> 2.25.1
>