On 15 Dec 2025, at 20:33, Ilya Maximets wrote:

> Kernels with AF_XDP support below v5.4 are not supported and are not
> used in any major distributions.  Using kernels that old with AF_XDP
> that was rapidly changing at the time is also not a good idea in
> general.  Let's drop support for them.  This allows to have the
> XDP_USE_NEED_WAKEUP flag to be always on, so we can simplify the code
> and stop guessing how many packets kernel can send in one go.
>
> The next milestone may be to require libxdp 1.2+ and kernel 5.18+, so
> we can simplify acinclude.m4 and avoid checking for bpf_xdp_detach and
> bpf_xdp_query_id API.  But it feels a little bit too early for that,
> as it excludes Ubuntu 22.04, even if it's probably not a great idea
> to run AF_XDP there either.  The gains from moving the pole to 5.18
> are just a couple of checks in the code, so not doing that for now.
>
> Signed-off-by: Ilya Maximets <[email protected]>

Thanks for taking care of this cleanup! The patch looks fine to me. I did a 
quick test on my local machine, and basic forwarding seems to work correctly.

Acked-by: Eelco Chaudron <[email protected]>

> ---
>
> Version 2:
>  * Cleaned up leftover structure fields in netdev-linux-private.h.
>
>  Documentation/intro/install/afxdp.rst |   8 +-
>  NEWS                                  |   5 +
>  acinclude.m4                          |   6 +-
>  lib/netdev-afxdp.c                    | 127 ++++++--------------------
>  lib/netdev-linux-private.h            |   3 -
>  vswitchd/vswitch.xml                  |  12 ---
>  6 files changed, 44 insertions(+), 117 deletions(-)
>
> diff --git a/Documentation/intro/install/afxdp.rst 
> b/Documentation/intro/install/afxdp.rst
> index 7fa8088c6..63a10e328 100644
> --- a/Documentation/intro/install/afxdp.rst
> +++ b/Documentation/intro/install/afxdp.rst
> @@ -89,6 +89,9 @@ vSwitch with AF_XDP will require the following:
>
>  - ``libbpf`` and ``libxdp`` (if version of ``libbpf`` if higher than 
> ``0.6``).
>
> +- Linux kernel v5.4+ or a nominally older distribution kernel that has
> +  required features backported.
> +
>  - Linux kernel XDP support, with the following options (required)
>
>    * CONFIG_BPF=y
> @@ -145,7 +148,7 @@ Finally, build and install OVS::
>
>  To kick start end-to-end autotesting::
>
> -  uname -a # make sure having 5.0+ kernel
> +  uname -a # make sure having 5.4+ kernel
>    ethtool --version # make sure ethtool is installed
>    make check-afxdp TESTSUITEFLAGS='1'
>
> @@ -180,9 +183,6 @@ more details):
>     ``native`` or ``generic``.  Defaults to ``best-effort``, i.e. best of
>     supported modes, so in most cases you don't need to change it.
>
> - * ``use-need-wakeup``: default ``true`` if libbpf supports it,
> -   otherwise ``false``.
> -
>  For example, to use 1 PMD (on core 4) on 1 queue (queue 0) device,
>  configure these options: ``pmd-cpu-mask``, ``pmd-rxq-affinity``, and
>  ``n_rxq``::
> diff --git a/NEWS b/NEWS
> index f9a74df1a..68c1b3eea 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -1,5 +1,10 @@
>  Post-v3.6.0
>  --------------------
> +   - AF_XDP:
> +     * AF_XDP support now requires Linux kernel v5.4+ or a nominally older
> +       distribution kernel with sufficient features backported.
> +     * Setting or reporting "use-need-wakeup" is no longer supported.  The
> +       feature is always enabled.
>     - TLS:
>       * Added support for TLS Server Name Indication (SNI) with the new
>         --ssl-server-name option.  This allows specifying the server name
> diff --git a/acinclude.m4 b/acinclude.m4
> index 369e37eae..bbc872142 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -310,7 +310,11 @@ AC_DEFUN([OVS_CHECK_LINUX_AF_XDP], [
>      AC_CHECK_HEADER([bpf/libbpf.h], [], [failed_dep="bpf/libbpf.h"])
>
>      if test "$failed_dep" = none; then
> -      AC_CHECK_HEADER([linux/if_xdp.h], [], [failed_dep="linux/if_xdp.h"])
> +      AC_CHECK_HEADER([linux/if_xdp.h], [
> +        AC_CHECK_DECLS([XDP_USE_NEED_WAKEUP], [],
> +                       [failed_dep="XDP_USE_NEED_WAKEUP"],
> +                       [[#include <linux/if_xdp.h>]])
> +      ], [failed_dep="linux/if_xdp.h"])
>      fi
>
>      if test "$failed_dep" = none; then
> diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> index 54029722e..09f4c5107 100644
> --- a/lib/netdev-afxdp.c
> +++ b/lib/netdev-afxdp.c
> @@ -79,12 +79,6 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
> 20);
>  #define PROD_NUM_DESCS      XSK_RING_PROD__DEFAULT_NUM_DESCS
>  #define CONS_NUM_DESCS      XSK_RING_CONS__DEFAULT_NUM_DESCS
>
> -#ifdef XDP_USE_NEED_WAKEUP
> -#define NEED_WAKEUP_DEFAULT true
> -#else
> -#define NEED_WAKEUP_DEFAULT false
> -#endif
> -
>  /* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets
>   * still on processing in threads. Number of packets currently in OVS
>   * processing is hard to estimate because it depends on number of ports.
> @@ -101,7 +95,6 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
>
>  static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
>                                               enum afxdp_mode mode,
> -                                             bool use_need_wakeup,
>                                               bool report_socket_failures);
>  static void xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode);
>  static void xsk_destroy(struct xsk_socket_info *xsk);
> @@ -176,19 +169,13 @@ struct netdev_afxdp_tx_lock {
>      );
>  };
>
> -#ifdef XDP_USE_NEED_WAKEUP
>  static inline void
>  xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
>                          struct netdev *netdev, int fd)
>  {
> -    struct netdev_linux *dev = netdev_linux_cast(netdev);
>      struct pollfd pfd;
>      int ret;
>
> -    if (!dev->use_need_wakeup) {
> -        return;
> -    }
> -
>      if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
>          pfd.fd = fd;
>          pfd.events = POLLIN;
> @@ -208,22 +195,6 @@ xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
>      return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
>  }
>
> -#else /* !XDP_USE_NEED_WAKEUP */
> -static inline void
> -xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
> -                        struct netdev *netdev OVS_UNUSED,
> -                        int fd OVS_UNUSED)
> -{
> -    /* Nothing. */
> -}
> -
> -static inline bool
> -xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
> -{
> -    return true;
> -}
> -#endif /* XDP_USE_NEED_WAKEUP */
> -
>  static void
>  netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
>  {
> @@ -341,7 +312,7 @@ xsk_configure_umem(void *buffer, uint64_t size)
>  static struct xsk_socket_info *
>  xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
>                       uint32_t queue_id, enum afxdp_mode mode,
> -                     bool use_need_wakeup, bool report_socket_failures)
> +                     bool report_socket_failures)
>  {
>      struct xsk_socket_config cfg;
>      struct xsk_socket_info *xsk;
> @@ -355,15 +326,9 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>      cfg.rx_size = CONS_NUM_DESCS;
>      cfg.tx_size = PROD_NUM_DESCS;
>      cfg.libbpf_flags = 0;
> -    cfg.bind_flags = xdp_modes[mode].bind_flags;
> +    cfg.bind_flags = xdp_modes[mode].bind_flags | XDP_USE_NEED_WAKEUP;
>      cfg.xdp_flags = xdp_modes[mode].xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST;
>
> -#ifdef XDP_USE_NEED_WAKEUP
> -    if (use_need_wakeup) {
> -        cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
> -    }
> -#endif
> -
>      if (if_indextoname(ifindex, devname) == NULL) {
>          VLOG_ERR("ifindex %d to devname failed (%s)",
>                   ifindex, ovs_strerror(errno));
> @@ -375,10 +340,8 @@ xsk_configure_socket(struct xsk_umem_info *umem, 
> uint32_t ifindex,
>                               &xsk->rx, &xsk->tx, &cfg);
>      if (ret) {
>          VLOG(report_socket_failures ? VLL_ERR : VLL_DBG,
> -             "xsk_socket__create failed (%s) mode: %s, "
> -             "use-need-wakeup: %s, qid: %d",
> -             ovs_strerror(errno), xdp_modes[mode].name,
> -             use_need_wakeup ? "true" : "false", queue_id);
> +             "xsk_socket__create failed (%s) mode: %s, qid: %d",
> +             ovs_strerror(errno), xdp_modes[mode].name, queue_id);
>          free(xsk);
>          return NULL;
>      }
> @@ -424,7 +387,7 @@ xsk_configure_socket(struct xsk_umem_info *umem, uint32_t 
> ifindex,
>
>  static struct xsk_socket_info *
>  xsk_configure(int ifindex, int xdp_queue_id, enum afxdp_mode mode,
> -              bool use_need_wakeup, bool report_socket_failures)
> +              bool report_socket_failures)
>  {
>      struct xsk_socket_info *xsk;
>      struct xsk_umem_info *umem;
> @@ -450,7 +413,7 @@ xsk_configure(int ifindex, int xdp_queue_id, enum 
> afxdp_mode mode,
>      VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
>
>      xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, mode,
> -                               use_need_wakeup, report_socket_failures);
> +                               report_socket_failures);
>      if (!xsk) {
>          /* Clean up umem and xpacket pool. */
>          if (xsk_umem__delete(umem->umem)) {
> @@ -470,11 +433,10 @@ xsk_configure_queue(struct netdev_linux *dev, int 
> ifindex, int queue_id,
>  {
>      struct xsk_socket_info *xsk_info;
>
> -    VLOG_DBG("%s: configuring queue: %d, mode: %s, use-need-wakeup: %s.",
> -             netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name,
> -             dev->use_need_wakeup ? "true" : "false");
> -    xsk_info = xsk_configure(ifindex, queue_id, mode, dev->use_need_wakeup,
> -                             report_socket_failures);
> +    VLOG_DBG("%s: configuring queue: %d, mode: %s.",
> +             netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name);
> +
> +    xsk_info = xsk_configure(ifindex, queue_id, mode, 
> report_socket_failures);
>      if (!xsk_info) {
>          VLOG(report_socket_failures ? VLL_ERR : VLL_DBG,
>               "%s: Failed to create AF_XDP socket on queue %d in %s mode.",
> @@ -617,7 +579,6 @@ netdev_afxdp_set_config(struct netdev *netdev, const 
> struct smap *args,
>      struct netdev_linux *dev = netdev_linux_cast(netdev);
>      const char *str_xdp_mode;
>      enum afxdp_mode xdp_mode;
> -    bool need_wakeup;
>      int new_n_rxq;
>
>      ovs_mutex_lock(&dev->mutex);
> @@ -644,20 +605,10 @@ netdev_afxdp_set_config(struct netdev *netdev, const 
> struct smap *args,
>          return EINVAL;
>      }
>
> -    need_wakeup = smap_get_bool(args, "use-need-wakeup", 
> NEED_WAKEUP_DEFAULT);
> -#ifndef XDP_USE_NEED_WAKEUP
> -    if (need_wakeup) {
> -        VLOG_WARN("XDP need_wakeup is not supported in libbpf/libxdp.");
> -        need_wakeup = false;
> -    }
> -#endif
> -
>      if (dev->requested_n_rxq != new_n_rxq
> -        || dev->requested_xdp_mode != xdp_mode
> -        || dev->requested_need_wakeup != need_wakeup) {
> +        || dev->requested_xdp_mode != xdp_mode) {
>          dev->requested_n_rxq = new_n_rxq;
>          dev->requested_xdp_mode = xdp_mode;
> -        dev->requested_need_wakeup = need_wakeup;
>          netdev_request_reconfigure(netdev);
>      }
>      ovs_mutex_unlock(&dev->mutex);
> @@ -672,8 +623,6 @@ netdev_afxdp_get_config(const struct netdev *netdev, 
> struct smap *args)
>      ovs_mutex_lock(&dev->mutex);
>      smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
>      smap_add_format(args, "xdp-mode", "%s", xdp_modes[dev->xdp_mode].name);
> -    smap_add_format(args, "use-need-wakeup", "%s",
> -                    dev->use_need_wakeup ? "true" : "false");
>      ovs_mutex_unlock(&dev->mutex);
>      return 0;
>  }
> @@ -708,7 +657,6 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
>
>      if (netdev->n_rxq == dev->requested_n_rxq
>          && dev->xdp_mode == dev->requested_xdp_mode
> -        && dev->use_need_wakeup == dev->requested_need_wakeup
>          && dev->xsks) {
>          goto out;
>      }
> @@ -725,7 +673,6 @@ netdev_afxdp_reconfigure(struct netdev *netdev)
>      if (setrlimit(RLIMIT_MEMLOCK, &r)) {
>          VLOG_ERR("setrlimit(RLIMIT_MEMLOCK) failed: %s", 
> ovs_strerror(errno));
>      }
> -    dev->use_need_wakeup = dev->requested_need_wakeup;
>
>      err = xsk_configure_all(netdev);
>      if (err) {
> @@ -922,39 +869,26 @@ netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct 
> dp_packet_batch *batch,
>  }
>
>  static inline int
> -kick_tx(struct xsk_socket_info *xsk_info, enum afxdp_mode mode,
> -        bool use_need_wakeup)
> +kick_tx(struct xsk_socket_info *xsk_info)
>  {
> -    int ret, retries;
> -    static const int KERNEL_TX_BATCH_SIZE = 16;
> -
> -    if (use_need_wakeup && !xsk_tx_need_wakeup(xsk_info)) {
> -        return 0;
> -    }
> -
> -    /* In all modes except native-with-zerocopy packet transmission is
> -     * synchronous, and the kernel xmits only TX_BATCH_SIZE(16) packets for a
> -     * single sendmsg syscall.
> -     * So, we have to kick the kernel (n_packets / 16) times to be sure that
> -     * all packets are transmitted. */
> -    retries = (mode != OVS_AF_XDP_MODE_NATIVE_ZC)
> -              ? xsk_info->outstanding_tx / KERNEL_TX_BATCH_SIZE
> -              : 0;
> -kick_retry:
> -    /* This causes system call into kernel's xsk_sendmsg, and 
> xsk_generic_xmit
> -     * (generic and native modes) or xsk_zc_xmit (native-with-zerocopy mode).
> -     */
> -    ret = sendto(xsk_socket__fd(xsk_info->xsk), NULL, 0, MSG_DONTWAIT,
> -                                NULL, 0);
> -    if (ret < 0) {
> -        if (retries-- && errno == EAGAIN) {
> -            goto kick_retry;
> -        }
> -        if (errno == ENXIO || errno == ENOBUFS || errno == EOPNOTSUPP) {
> -            return errno;
> +    /* At most try as many times as the number of packets we have to send. */
> +    uint32_t max_retry = xsk_info->outstanding_tx;
> +
> +    while (max_retry-- && xsk_tx_need_wakeup(xsk_info)) {
> +        int fd = xsk_socket__fd(xsk_info->xsk);
> +        int ret;
> +
> +        /* This causes system call into kernel's xsk_sendmsg, and
> +         * xsk_generic_xmit (generic and native modes) or xsk_wakeup
> +         * (native-with-zerocopy mode). */
> +        ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
> +        if (ret < 0 && errno != EAGAIN) {
> +            /* It's possible that the device is busy, but we shouldn't
> +             * alert users, as that can be too much noise. */
> +            return errno == EBUSY ? 0 : errno;
>          }
>      }
> -    /* No error, or EBUSY, or too many retries on EAGAIN. */
> +    /* No error or too many retries on EAGAIN. */
>      return 0;
>  }
>
> @@ -1104,7 +1038,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int 
> qid,
>                             &orig);
>          COVERAGE_INC(afxdp_tx_full);
>          afxdp_complete_tx(xsk_info);
> -        kick_tx(xsk_info, dev->xdp_mode_in_use, dev->use_need_wakeup);
> +        kick_tx(xsk_info);
>          error = ENOMEM;
>          goto out;
>      }
> @@ -1128,7 +1062,7 @@ __netdev_afxdp_batch_send(struct netdev *netdev, int 
> qid,
>      xsk_ring_prod__submit(&xsk_info->tx, dp_packet_batch_size(batch));
>      xsk_info->outstanding_tx += dp_packet_batch_size(batch);
>
> -    ret = kick_tx(xsk_info, dev->xdp_mode_in_use, dev->use_need_wakeup);
> +    ret = kick_tx(xsk_info);
>      if (OVS_UNLIKELY(ret)) {
>          VLOG_WARN_RL(&rl, "%s: error sending AF_XDP packet: %s.",
>                       netdev_get_name(netdev), ovs_strerror(ret));
> @@ -1219,7 +1153,6 @@ netdev_afxdp_construct(struct netdev *netdev)
>
>      dev->requested_n_rxq = NR_QUEUE;
>      dev->requested_xdp_mode = OVS_AF_XDP_MODE_BEST_EFFORT;
> -    dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
>
>      dev->xsks = NULL;
>      dev->tx_locks = NULL;
> diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
> index 4b5b60675..cf4a021d3 100644
> --- a/lib/netdev-linux-private.h
> +++ b/lib/netdev-linux-private.h
> @@ -119,9 +119,6 @@ struct netdev_linux {
>      enum afxdp_mode requested_xdp_mode;     /* Requested  AF_XDP mode. */
>      enum afxdp_mode xdp_mode_in_use;        /* Effective  AF_XDP mode. */
>
> -    bool use_need_wakeup;
> -    bool requested_need_wakeup;
> -
>      struct netdev_afxdp_tx_lock *tx_locks;  /* Array of locks for TX queues. 
> */
>  #endif
>  };
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 3dcf74402..b93ad9344 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -3446,18 +3446,6 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 
> type=patch options:peer=p1 \
>          </p>
>        </column>
>
> -      <column name="options" key="use-need-wakeup"
> -              type='{"type": "boolean"}'>
> -        <p>
> -          Specifies whether to use need_wakeup feature in afxdp netdev.
> -          If enabled, OVS explicitly wakes up the kernel RX, using poll()
> -          syscall and wakes up TX, using sendto() syscall. For physical
> -          devices, this feature improves the performance by avoiding
> -          unnecessary sendto syscalls.
> -          Defaults to true if supported by libbpf.
> -        </p>
> -      </column>
> -
>        <column name="options" key="vhost-server-path"
>                type='{"type": "string"}'>
>          <p>
> -- 
> 2.52.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to