On Tue, 24 Feb 2026 at 12:06, Kevin Traynor <[email protected]> wrote:
> > ---
> >  lib/dp-packet.c      | 17 +++++++++++-
> >  lib/netdev-dpdk.c    | 50 +++++++++++++++++++++++++++---------
> >  lib/netdev-dpdk.h    |  3 +++
> >  tests/system-dpdk.at | 61 ++++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 118 insertions(+), 13 deletions(-)
> >
> > diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> > index b34bcf26f3..c1ff299a75 100644
> > --- a/lib/dp-packet.c
> > +++ b/lib/dp-packet.c
> > @@ -255,8 +255,23 @@ dp_packet_resize(struct dp_packet *b, size_t 
> > new_headroom, size_t new_tailroom)
> >      new_allocated = new_headroom + dp_packet_size(b) + new_tailroom;
> >
> >      switch (b->source) {
> > -    case DPBUF_DPDK:
> > +    case DPBUF_DPDK: {
> > +#ifdef DPDK_NETDEV
> > +        uint32_t buf_len;
> > +
> > +        buf_len = new_allocated;
> > +        new_base = netdev_dpdk_extbuf_allocate(&buf_len);
> > +        if (!new_base) {
> > +            out_of_memory();
>
> Unlikely but it can also be packet too big, so it's not necessarily an
> OOM. related comment in dpdk_pktmbuf_attach_extbuf
>
> > +        }
> > +        ovs_assert(buf_len <= UINT16_MAX);
> > +        dp_packet_copy__(b, new_base, new_headroom, new_tailroom);
> > +        netdev_dpdk_extbuf_replace(b, new_base, buf_len);
> > +        break;
> > +#else
> >          OVS_NOT_REACHED();
> > +#endif
> > +    }
> >
> >      case DPBUF_MALLOC:
> >          if (new_headroom == dp_packet_headroom(b)) {
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index 687e1196b5..b416c404da 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -3094,41 +3094,67 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk 
> > *dev, struct rte_mbuf **pkts,
> >      return cnt;
> >  }
> >
> > +void *
> > +netdev_dpdk_extbuf_allocate(uint32_t *data_len)
> > +{
> > +    *data_len += sizeof(struct rte_mbuf_ext_shared_info) + 
> > sizeof(uintptr_t);
> > +    *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t));
> > +    return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE);
> > +}
> > +
> >  static void
> >  netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
> >  {
> >      rte_free(opaque);
> >  }
> >
> > +void
> > +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t 
> > data_len)
> > +{
> > +    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
> > +    struct rte_mbuf_ext_shared_info *shinfo;
> > +    uint16_t buf_len = data_len;
> > +
> > +    shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> > +                                                netdev_dpdk_extbuf_free,
> > +                                                buf);
> > +    ovs_assert(shinfo != NULL);
> > +
> > +    if (RTE_MBUF_HAS_EXTBUF(pkt)) {
> > +        rte_pktmbuf_detach_extbuf(pkt);
> > +    }
> > +    rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), buf_len,
> > +                              shinfo);
> > +    /* OVS only supports mono segment.
> > +     * Packet size did not change, restore the current segment length. */
> > +    pkt->data_len = pkt->pkt_len;
> > +}
> > +
> >  static struct rte_mbuf *
> >  dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len)
> >  {
> >      uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len;
> > -    struct rte_mbuf_ext_shared_info *shinfo = NULL;
> > +    struct rte_mbuf_ext_shared_info *shinfo;
> >      uint16_t buf_len;
> >      void *buf;
> >
> > -    total_len += sizeof *shinfo + sizeof(uintptr_t);
> > -    total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
> > -
> > +    buf = netdev_dpdk_extbuf_allocate(&total_len);
> > +    if (OVS_UNLIKELY(buf == NULL)) {
> > +        VLOG_ERR("Failed to allocate memory using rte_malloc: %u", 
> > total_len);
> > +        return NULL;
> > +    }
> >      if (OVS_UNLIKELY(total_len > UINT16_MAX)) {
> > +        netdev_dpdk_extbuf_free(NULL, buf);
> >          VLOG_ERR("Can't copy packet: too big %u", total_len);
>
> You lose a bit of visibility now as it will be logged as "Failed to
> allocate memory using rte_malloc:" for a packet too big.
>
> You could take the rte_malloc out of netdev_dpdk_extbuf_allocate and
> just use the (renamed) fn to calculate the size ? or else change the
> logging to be something more generic. wdyt?

Mm, that should align well with the assert on the buffer length.
I'll try this for the next revision.


>
> >          return NULL;
> >      }
> >
> >      buf_len = total_len;
> > -    buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
> > -    if (OVS_UNLIKELY(buf == NULL)) {
> > -        VLOG_ERR("Failed to allocate memory using rte_malloc: %u", 
> > buf_len);
> > -        return NULL;
> > -    }
> > -
> > -    /* Initialize shinfo. */
> >      shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> >                                                  netdev_dpdk_extbuf_free,
> >                                                  buf);
> >      if (OVS_UNLIKELY(shinfo == NULL)) {
> > -        rte_free(buf);
> > +        netdev_dpdk_extbuf_free(NULL, buf);
> >          VLOG_ERR("Failed to initialize shared info for mbuf while "
> >                   "attempting to attach an external buffer.");
> >          return NULL;
> > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> > index 86df7a1e83..274900c062 100644
> > --- a/lib/netdev-dpdk.h
> > +++ b/lib/netdev-dpdk.h
> > @@ -32,6 +32,9 @@ struct netdev;
> >  void netdev_dpdk_register(const struct smap *);
> >  void free_dpdk_buf(struct dp_packet *);
> >
> > +void *netdev_dpdk_extbuf_allocate(uint32_t *);
> > +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t);
> > +
> >  bool netdev_dpdk_flow_api_supported(struct netdev *);
> >
> >  int
> > diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
> > index 393b76c76c..5c27f6fb72 100644
> > --- a/tests/system-dpdk.at
> > +++ b/tests/system-dpdk.at
> > @@ -941,3 +941,64 @@ AT_CHECK([ovs-vsctl del-port br10 p1], [], [stdout], 
> > [stderr])
> >  OVS_DPDK_STOP_VSWITCHD
> >  AT_CLEANUP
> >  dnl 
> > --------------------------------------------------------------------------
> > +
> > +
> > +
> > +dnl 
> > --------------------------------------------------------------------------
> > +dnl Test headroom expansion.
> > +AT_SETUP([OVS-DPDK - headroom expansion])
> > +AT_KEYWORDS([dpdk])
> > +OVS_DPDK_PRE_CHECK()
> > +OVS_DPDK_START([--no-pci])
> > +
> > +ADD_BR([br0])
> > +ADD_BR([br-underlay0])
> > +ADD_BR([br-underlay1])
> > +
> > +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> > +AT_CHECK([ovs-ofctl add-flow br-underlay0 "actions=normal"])
> > +AT_CHECK([ovs-ofctl add-flow br-underlay1 "actions=normal"])
> > +
> > +ADD_NAMESPACES(at_ns0)
> > +
> > +AT_CHECK([ip link set dev br-underlay1 up])
> > +AT_CHECK([ip link set dev br-underlay0 up])
> > +
> > +AT_CHECK([ip addr add dev br-underlay1 "fc00::100/64" nodad])
> > +AT_CHECK([ovs-vsctl add-port br-underlay1 p0 -- \
> > +          set interface p0 type=dpdk -- \
> > +          set interface p0 options:n_rxq=$(lscpu | awk '/NUMA node\(s\)/ { 
> > print $NF + 1 }') -- \
> > +          set interface p0 options:dpdk-devargs=net_tap0,iface=tap1])
> > +AT_CHECK([ip link set tap1 netns at_ns0])
> > +NS_CHECK_EXEC([at_ns0], [ip link set tap1 up])
> > +OVS_WAIT_UNTIL([ip -n at_ns0 link show dev tap1 | grep -qw LOWER_UP])
> > +NS_CHECK_EXEC([at_ns0], [ip -6 addr add "fc00::1/64" nodad dev tap1])
> > +
> > +ADD_OVS_TUNNEL6([vxlan], [br-underlay0], [at_vxlan2], [fc00::1],
> > +                ["fc00:1::100/64" nodad], [options:key=0])
> > +ADD_NATIVE_TUNNEL6([vxlan], [at_vxlan3], [at_ns0], [fc00::100],
> > +                   ["fc00:1::1/64" nodad], [id 0 dstport 4789])
> > +
> > +ADD_OVS_TUNNEL6([vxlan], [br0], [at_vxlan0], [fc00:1::1],
> > +                ["fc00:2::100/64" nodad], [options:key=1])
> > +ADD_NATIVE_TUNNEL6([vxlan], [at_vxlan1], [at_ns0], [fc00:1::100],
> > +                   ["fc00:2::1/64" nodad], [id 1 dstport 4789])
> > +
> > +AT_CHECK([ovs-vsctl add-port br0 p1 -- \
> > +          set interface p1 type=dpdk -- \
> > +          set interface p1 options:n_rxq=$(lscpu | awk '/NUMA node\(s\)/ { 
> > print $NF + 1 }') -- \
> > +          set interface p1 options:dpdk-devargs=net_tap1,iface=tap2])
> > +OVS_WAIT_UNTIL([ip link show dev tap2 | grep -qw LOWER_UP])
> > +AT_CHECK([ip -6 addr add "fc00:2::200/64" nodad dev tap2], [], [stdout], 
> > [stderr])
> > +
> > +OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00::100])
> > +OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00:1::100])
> > +OVS_WAIT_UNTIL([ip netns exec at_ns0 ping6 -c 1 fc00:2::100])
> > +NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -W 2 fc00:2::200 | 
> > FORMAT_PING], [0], [dnl
> > +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> > +])
> > +
> > +dnl Clean up
> > +OVS_DPDK_STOP_VSWITCHD(["/Failed to send start req to secondary 95/d"])
>
> fyi - the UT failed on my rhel system (below). It requires
> kernel-modules-extra to get sch_multiq - with that it passes. I didn't
> dig deeper, but maybe we could skip if sch_multiq is not present.
>
> |dpdk|ERR|TAP: tap_nl_dump_ext_ack(): Specified qdisc kind is unknown
> |dpdk|ERR|TAP: qdisc_create_multiq(): Could not add multiq qdisc (2): No
> such file or directory
> |dpdk|ERR|TAP: eth_dev_tap_create(): tap1: failed to create multiq qdisc.
> |dpdk|ERR|TAP: eth_dev_tap_create():  Disabling rte flow support: No
> such file or directory(2)

Ah yes, I remember Eelco and I got those (actually debug) logs in the past...
Those errors are false positives unless the application wants to use rte_flow.

I'll update the ignored logs for this test (and I posted a cleanup on
dpdk side).


-- 
David Marchand

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to