From: Ilias Apalodimas <ilias.apalodi...@linaro.org>

This patch is changing struct sk_buff, and is thus per-definition
controversial.

Place a new member 'mem_info' of type struct xdp_mem_info, just after
members (flags) head_frag and pfmemalloc, And not in between
headers_start/end to ensure skb_copy() and pskb_copy() work as-is.
Copying mem_info during skb_clone() is required.  This makes sure that
pages are correctly freed or recycled during the altered
skb_free_head() invocation.

The 'mem_info' name is chosen as this is not strictly tied to XDP,
although the XDP return infrastructure is used.  As a future plan, we
could introduce a __u8 flags member to xdp_mem_info and move flags
head_frag and pfmemalloc into this area.

Signed-off-by: Ilias Apalodimas <ilias.apalodi...@linaro.org>
Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com>
---
 include/linux/skbuff.h |    6 +++++-
 include/net/xdp.h      |    1 +
 net/core/skbuff.c      |    7 +++++++
 net/core/xdp.c         |    6 ++++++
 4 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7dcfb5591dc3..95dac0ba6947 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -40,6 +40,7 @@
 #include <linux/in6.h>
 #include <linux/if_packet.h>
 #include <net/flow.h>
+#include <net/xdp.h>
 
 /* The interface for checksum offload between the stack and networking drivers
  * is as follows...
@@ -744,6 +745,10 @@ struct sk_buff {
                                head_frag:1,
                                xmit_more:1,
                                pfmemalloc:1;
+       /* TODO: Future idea, extend mem_info with __u8 flags, and
+        * move bits head_frag and pfmemalloc there.
+        */
+       struct xdp_mem_info     mem_info;
 
        /* fields enclosed in headers_start/headers_end are copied
         * using a single memcpy() in __copy_skb_header()
@@ -827,7 +832,6 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
        __u32           secmark;
 #endif
-
        union {
                __u32           mark;
                __u32           reserved_tailroom;
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 5c33b9e0efab..4a0ca7a3d5e5 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -128,6 +128,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 void xdp_return_frame(struct xdp_frame *xdpf);
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
 void xdp_return_buff(struct xdp_buff *xdp);
+void xdp_return_skb_page(void *data, struct xdp_mem_info *mem_info);
 
 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
                     struct net_device *dev, u32 queue_index);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b4ee5c8b928f..71aca186e44c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
+#include <net/page_pool.h>
 
 #include <linux/uaccess.h>
 #include <trace/events/skb.h>
@@ -544,6 +545,11 @@ static void skb_free_head(struct sk_buff *skb)
 {
        unsigned char *head = skb->head;
 
+       if (skb->mem_info.type == MEM_TYPE_PAGE_POOL) {
+               xdp_return_skb_page(head, &skb->mem_info);
+               return;
+       }
+
        if (skb->head_frag)
                skb_free_frag(head);
        else
@@ -859,6 +865,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, 
struct sk_buff *skb)
        n->nohdr = 0;
        n->peeked = 0;
        C(pfmemalloc);
+       C(mem_info);
        n->destructor = NULL;
        C(tail);
        C(end);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index e79526314864..1703be4c2611 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -379,6 +379,12 @@ void xdp_return_buff(struct xdp_buff *xdp)
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
 
+void xdp_return_skb_page(void *data, struct xdp_mem_info *mem_info)
+{
+       __xdp_return(data, mem_info, false, 0);
+}
+EXPORT_SYMBOL(xdp_return_skb_page);
+
 int xdp_attachment_query(struct xdp_attachment_info *info,
                         struct netdev_bpf *bpf)
 {

Reply via email to