xdp_build_skb_from_zc() allocated xdp->frame_sz bytes from the per-cpu
system_page_pool and built the skb head with napi_build_skb(). The
latter places skb_shared_info at the tail of the buffer, but the
helper sized the allocation as if the whole frame_sz were usable for
data. Whenever the packet plus reserved headroom approached frame_sz,
the head memcpy overran shinfo with packet content, corrupting
->flags (SKBFL_ZEROCOPY_ENABLE) and ->nr_frags, which then drove
skb_copy_ubufs() off the end of frags[] on the RX path:

  UBSAN: array-index-out-of-bounds in include/linux/skbuff.h:2541
  index 113 is out of range for type 'skb_frag_t [17]'
   skb_copy_ubufs+0x7da/0x960
   ip_local_deliver_finish+0xcd/0x110
   ice_napi_poll+0xe4/0x2a0 [ice]

The overrun bytes come from the packet, so an on-wire sender can
corrupt kernel memory remotely whenever the XDP program returns
XDP_PASS.

Rather than patch the sizing math, switch to the pattern used by other
in-tree AF_XDP zero-copy drivers like mlx5 and i40e which use
napi_alloc_skb() sized to the actual packet plus skb_put_data().
This sizes the head exactly for the data being copied, drops the
system_page_pool local_lock from this path, and removes the
structural mismatch between frame_sz and the skb head buffer. Frags
are allocated with alloc_page() per frag, matching the other drivers.

Fixes: 560d958c6c68 ("xsk: add generic XSk &xdp_buff -> skb conversion")
Cc: [email protected]
Signed-off-by: Lorenz Brun <[email protected]>
---
 drivers/net/ethernet/intel/ice/ice_xsk.c |  2 +-
 include/net/libeth/xsk.h                 |  2 +-
 include/net/xdp.h                        |  3 +-
 net/core/xdp.c                           | 72 ++++++++----------------
 4 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c 
b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 0643017541c35..6c01a14fde150 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -653,7 +653,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
 
 construct_skb:
                /* XDP_PASS path */
-               skb = xdp_build_skb_from_zc(first);
+               skb = xdp_build_skb_from_zc(&rx_ring->q_vector->napi, first);
                if (!skb) {
                        xsk_buff_free(first);
                        first = NULL;
diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h
index 82b5d21aae878..922b4587acd3f 100644
--- a/include/net/libeth/xsk.h
+++ b/include/net/libeth/xsk.h
@@ -468,7 +468,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp,
        if (act != LIBETH_XDP_PASS)
                return act != LIBETH_XDP_ABORTED;
 
-       skb = xdp_build_skb_from_zc(&xdp->base);
+       skb = xdp_build_skb_from_zc(napi, &xdp->base);
        if (unlikely(!skb)) {
                libeth_xsk_buff_free_slow(xdp);
                return true;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index aa742f413c358..fb2452243fd36 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -375,7 +375,8 @@ void xdp_warn(const char *msg, const char *func, const int 
line);
 #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
 
 struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
-struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp);
+struct sk_buff *xdp_build_skb_from_zc(struct napi_struct *napi,
+                                     struct xdp_buff *xdp);
 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
 struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
                                           struct sk_buff *skb,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9890a30584ba7..54005b64e6cbb 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -677,16 +677,14 @@ EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
  * xdp_copy_frags_from_zc - copy frags from XSk buff to skb
  * @skb: skb to copy frags to
  * @xdp: XSk &xdp_buff from which the frags will be copied
- * @pp: &page_pool backing page allocation, if available
  *
  * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack.
- * Allocate a new buffer for each frag, copy it and attach to the skb.
+ * Allocate a new page for each frag, copy it and attach to the skb.
  *
- * Return: true on success, false on netmem allocation fail.
+ * Return: true on success, false on page allocation fail.
  */
 static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
-                                           const struct xdp_buff *xdp,
-                                           struct page_pool *pp)
+                                           const struct xdp_buff *xdp)
 {
        struct skb_shared_info *sinfo = skb_shinfo(skb);
        const struct skb_shared_info *xinfo;
@@ -699,20 +697,18 @@ static noinline bool xdp_copy_frags_from_zc(struct 
sk_buff *skb,
        for (u32 i = 0; i < nr_frags; i++) {
                const skb_frag_t *frag = &xinfo->frags[i];
                u32 len = skb_frag_size(frag);
-               u32 offset, truesize = len;
                struct page *page;
 
-               page = page_pool_dev_alloc(pp, &offset, &truesize);
+               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
                if (unlikely(!page)) {
                        sinfo->nr_frags = i;
                        return false;
                }
 
-               memcpy(page_address(page) + offset, skb_frag_address(frag),
-                      LARGEST_ALIGN(len));
-               __skb_fill_page_desc_noacc(sinfo, i, page, offset, len);
+               memcpy(page_address(page), skb_frag_address(frag), len);
+               __skb_fill_page_desc_noacc(sinfo, i, page, 0, len);
 
-               tsize += truesize;
+               tsize += PAGE_SIZE;
                if (page_is_pfmemalloc(page))
                        flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
        }
@@ -725,49 +721,34 @@ static noinline bool xdp_copy_frags_from_zc(struct 
sk_buff *skb,
 
 /**
  * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff
+ * @napi: NAPI instance the buffer was received on (provides the skb cache)
  * @xdp: source XSk buff
  *
  * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb
- * head, new buffer for the head, copy the data and initialize the skb fields.
- * If there are frags, allocate new buffers for them and copy.
- * Buffers are allocated from the system percpu pools to try recycling them.
- * If new skb was built successfully, @xdp is returned to XSk pool's freelist.
- * On error, it remains untouched and the caller must take care of this.
+ * sized to the packet from the NAPI cache, copy the head data, and copy
+ * any frags into freshly allocated pages.
+ *
+ * If a new skb was built successfully, @xdp is returned to the XSk pool's
+ * freelist. On error, it remains untouched and the caller must take care
+ * of this.
  *
  * Return: new &sk_buff on success, %NULL on error.
  */
-struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
+struct sk_buff *xdp_build_skb_from_zc(struct napi_struct *napi,
+                                     struct xdp_buff *xdp)
 {
        const struct xdp_rxq_info *rxq = xdp->rxq;
-       u32 len = xdp->data_end - xdp->data_meta;
-       u32 truesize = xdp->frame_sz;
-       struct sk_buff *skb = NULL;
-       struct page_pool *pp;
-       int metalen;
-       void *data;
+       u32 totallen = xdp->data_end - xdp->data_meta;
+       u32 metalen = xdp->data - xdp->data_meta;
+       struct sk_buff *skb;
 
-       if (!IS_ENABLED(CONFIG_PAGE_POOL))
+       skb = napi_alloc_skb(napi, totallen);
+       if (unlikely(!skb))
                return NULL;
 
-       local_lock_nested_bh(&system_page_pool.bh_lock);
-       pp = this_cpu_read(system_page_pool.pool);
-       data = page_pool_dev_alloc_va(pp, &truesize);
-       if (unlikely(!data))
-               goto out;
-
-       skb = napi_build_skb(data, truesize);
-       if (unlikely(!skb)) {
-               page_pool_free_va(pp, data, true);
-               goto out;
-       }
-
-       skb_mark_for_recycle(skb);
-       skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+       skb_put_data(skb, xdp->data_meta, totallen);
 
-       memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len));
-
-       metalen = xdp->data - xdp->data_meta;
-       if (metalen > 0) {
+       if (metalen) {
                skb_metadata_set(skb, metalen);
                __skb_pull(skb, metalen);
        }
@@ -775,18 +756,15 @@ struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff 
*xdp)
        skb_record_rx_queue(skb, rxq->queue_index);
 
        if (unlikely(xdp_buff_has_frags(xdp)) &&
-           unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) {
+           unlikely(!xdp_copy_frags_from_zc(skb, xdp))) {
                napi_consume_skb(skb, true);
-               skb = NULL;
-               goto out;
+               return NULL;
        }
 
        xsk_buff_free(xdp);
 
        skb->protocol = eth_type_trans(skb, rxq->dev);
 
-out:
-       local_unlock_nested_bh(&system_page_pool.bh_lock);
        return skb;
 }
 EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
-- 
2.51.2

Reply via email to