The generic xdp_frame format, was inspired by the cpumap own internal
xdp_pkt format.  It is now time to convert it over to the generic
xdp_frame format.  The cpumap needs one extra field dev_rx.

Signed-off-by: Jesper Dangaard Brouer <bro...@redhat.com>
---
 include/net/xdp.h   |    1 +
 kernel/bpf/cpumap.c |  100 ++++++++++++++-------------------------------------
 2 files changed, 29 insertions(+), 72 deletions(-)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 756c42811e78..ea3773f94f65 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -67,6 +67,7 @@ struct xdp_frame {
         * while mem info is valid on remote CPU.
         */
        struct xdp_mem_info mem;
+       struct net_device *dev_rx; /* used by cpumap */
 };
 
 /* Convert xdp_buff to xdp_frame */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 3e4bbcbe3e86..bcdc4dea5ce7 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -159,52 +159,8 @@ static void cpu_map_kthread_stop(struct work_struct *work)
        kthread_stop(rcpu->kthread);
 }
 
-/* For now, xdp_pkt is a cpumap internal data structure, with info
- * carried between enqueue to dequeue. It is mapped into the top
- * headroom of the packet, to avoid allocating separate mem.
- */
-struct xdp_pkt {
-       void *data;
-       u16 len;
-       u16 headroom;
-       u16 metasize;
-       /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
-        * while mem info is valid on remote CPU.
-        */
-       struct xdp_mem_info mem;
-       struct net_device *dev_rx;
-};
-
-/* Convert xdp_buff to xdp_pkt */
-static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
-{
-       struct xdp_pkt *xdp_pkt;
-       int metasize;
-       int headroom;
-
-       /* Assure headroom is available for storing info */
-       headroom = xdp->data - xdp->data_hard_start;
-       metasize = xdp->data - xdp->data_meta;
-       metasize = metasize > 0 ? metasize : 0;
-       if (unlikely((headroom - metasize) < sizeof(*xdp_pkt)))
-               return NULL;
-
-       /* Store info in top of packet */
-       xdp_pkt = xdp->data_hard_start;
-
-       xdp_pkt->data = xdp->data;
-       xdp_pkt->len  = xdp->data_end - xdp->data;
-       xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
-       xdp_pkt->metasize = metasize;
-
-       /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
-       xdp_pkt->mem = xdp->rxq->mem;
-
-       return xdp_pkt;
-}
-
 static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
-                                        struct xdp_pkt *xdp_pkt)
+                                        struct xdp_frame *xdpf)
 {
        unsigned int frame_size;
        void *pkt_data_start;
@@ -219,7 +175,7 @@ static struct sk_buff *cpu_map_build_skb(struct 
bpf_cpu_map_entry *rcpu,
         * would be preferred to set frame_size to 2048 or 4096
         * depending on the driver.
         *   frame_size = 2048;
-        *   frame_len  = frame_size - sizeof(*xdp_pkt);
+        *   frame_len  = frame_size - sizeof(*xdp_frame);
         *
         * Instead, with info avail, skb_shared_info in placed after
         * packet len.  This, unfortunately fakes the truesize.
@@ -227,21 +183,21 @@ static struct sk_buff *cpu_map_build_skb(struct 
bpf_cpu_map_entry *rcpu,
         * is not at a fixed memory location, with mixed length
         * packets, which is bad for cache-line hotness.
         */
-       frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom +
+       frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom +
                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-       pkt_data_start = xdp_pkt->data - xdp_pkt->headroom;
+       pkt_data_start = xdpf->data - xdpf->headroom;
        skb = build_skb(pkt_data_start, frame_size);
        if (!skb)
                return NULL;
 
-       skb_reserve(skb, xdp_pkt->headroom);
-       __skb_put(skb, xdp_pkt->len);
-       if (xdp_pkt->metasize)
-               skb_metadata_set(skb, xdp_pkt->metasize);
+       skb_reserve(skb, xdpf->headroom);
+       __skb_put(skb, xdpf->len);
+       if (xdpf->metasize)
+               skb_metadata_set(skb, xdpf->metasize);
 
        /* Essential SKB info: protocol and skb->dev */
-       skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx);
+       skb->protocol = eth_type_trans(skb, xdpf->dev_rx);
 
        /* Optional SKB info, currently missing:
         * - HW checksum info           (skb->ip_summed)
@@ -259,11 +215,11 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
         * invoked cpu_map_kthread_stop(). Catch any broken behaviour
         * gracefully and warn once.
         */
-       struct xdp_pkt *xdp_pkt;
+       struct xdp_frame *xdpf;
 
-       while ((xdp_pkt = ptr_ring_consume(ring)))
-               if (WARN_ON_ONCE(xdp_pkt))
-                       xdp_return_frame(xdp_pkt, &xdp_pkt->mem);
+       while ((xdpf = ptr_ring_consume(ring)))
+               if (WARN_ON_ONCE(xdpf))
+                       xdp_return_frame(xdpf->data, &xdpf->mem);
 }
 
 static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -290,7 +246,7 @@ static int cpu_map_kthread_run(void *data)
         */
        while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
                unsigned int processed = 0, drops = 0, sched = 0;
-               struct xdp_pkt *xdp_pkt;
+               struct xdp_frame *xdpf;
 
                /* Release CPU reschedule checks */
                if (__ptr_ring_empty(rcpu->queue)) {
@@ -313,13 +269,13 @@ static int cpu_map_kthread_run(void *data)
                 * kthread CPU pinned. Lockless access to ptr_ring
                 * consume side valid as no-resize allowed of queue.
                 */
-               while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
+               while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
                        struct sk_buff *skb;
                        int ret;
 
-                       skb = cpu_map_build_skb(rcpu, xdp_pkt);
+                       skb = cpu_map_build_skb(rcpu, xdpf);
                        if (!skb) {
-                               xdp_return_frame(xdp_pkt, &xdp_pkt->mem);
+                               xdp_return_frame(xdpf->data, &xdpf->mem);
                                continue;
                        }
 
@@ -616,13 +572,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry 
*rcpu,
        spin_lock(&q->producer_lock);
 
        for (i = 0; i < bq->count; i++) {
-               struct xdp_pkt *xdp_pkt = bq->q[i];
+               struct xdp_frame *xdpf = bq->q[i];
                int err;
 
-               err = __ptr_ring_produce(q, xdp_pkt);
+               err = __ptr_ring_produce(q, xdpf);
                if (err) {
                        drops++;
-                       xdp_return_frame(xdp_pkt->data, &xdp_pkt->mem);
+                       xdp_return_frame(xdpf->data, &xdpf->mem);
                }
                processed++;
        }
@@ -637,7 +593,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
 /* Runs under RCU-read-side, plus in softirq under NAPI protection.
  * Thus, safe percpu variable access.
  */
-static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
+static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
 {
        struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
 
@@ -648,28 +604,28 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, 
struct xdp_pkt *xdp_pkt)
         * driver to code invoking us to finished, due to driver
         * (e.g. ixgbe) recycle tricks based on page-refcnt.
         *
-        * Thus, incoming xdp_pkt is always queued here (else we race
+        * Thus, incoming xdp_frame is always queued here (else we race
         * with another CPU on page-refcnt and remaining driver code).
         * Queue time is very short, as driver will invoke flush
         * operation, when completing napi->poll call.
         */
-       bq->q[bq->count++] = xdp_pkt;
+       bq->q[bq->count++] = xdpf;
        return 0;
 }
 
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
                    struct net_device *dev_rx)
 {
-       struct xdp_pkt *xdp_pkt;
+       struct xdp_frame *xdpf;
 
-       xdp_pkt = convert_to_xdp_pkt(xdp);
-       if (unlikely(!xdp_pkt))
+       xdpf = convert_to_xdp_frame(xdp);
+       if (unlikely(!xdpf))
                return -EOVERFLOW;
 
        /* Info needed when constructing SKB on remote CPU */
-       xdp_pkt->dev_rx = dev_rx;
+       xdpf->dev_rx = dev_rx;
 
-       bq_enqueue(rcpu, xdp_pkt);
+       bq_enqueue(rcpu, xdpf);
        return 0;
 }
 

Reply via email to