Hi Simon,

Thank you for the review, I am sending
out v2 to address these comments.


Regards
Dipayaan Roy


On Tue, Jul 22, 2025 at 12:19:29PM +0100, Simon Horman wrote:
> On Mon, Jul 21, 2025 at 03:14:17AM -0700, Dipayaan Roy wrote:
> > This patch enhances RX buffer handling in the mana driver by allocating
> > pages from a page pool and slicing them into MTU-sized fragments, rather
> > than dedicating a full page per packet. This approach is especially
> > beneficial on systems with 64KB page sizes.
> > 
> > Key improvements:
> > 
> > - Proper integration of page pool for RX buffer allocations.
> > - MTU-sized buffer slicing to improve memory utilization.
> > - Reduce overall per Rx queue memory footprint.
> > - Automatic fallback to full-page buffers when:
> >    * Jumbo frames are enabled (MTU > PAGE_SIZE / 2).
> >    * The XDP path is active, to avoid complexities with fragment reuse.
> > - Removal of redundant pre-allocated RX buffers used in scenarios like MTU
> >   changes, ensuring consistency in RX buffer allocation.
> > 
> > Testing on VMs with 64KB pages shows around 200% throughput improvement.
> > Memory efficiency is significantly improved due to reduced wastage in page
> > allocations. Example: We are now able to fit 35 Rx buffers in a single 64KB
> > page for MTU size of 1500, instead of 1 Rx buffer per page previously.
> > 
> > Tested:
> > 
> > - iperf3, iperf2, and nttcp benchmarks.
> > - Jumbo frames with MTU 9000.
> > - Native XDP programs (XDP_PASS, XDP_DROP, XDP_TX, XDP_REDIRECT) for
> >   testing the driver???s XDP path.
> > - Page leak detection (kmemleak).
> > - Driver load/unload, reboot, and stress scenarios.
> > 
> > Signed-off-by: Dipayaan Roy <[email protected]>
> 
> Hi,
> 
> Some minor feedback from my side.
> 
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c 
> > b/drivers/net/ethernet/microsoft/mana/mana_en.c
> 
> ...
> 
> > -int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int 
> > num_queues)
> > -{
> > -   struct device *dev;
> > -   struct page *page;
> > -   dma_addr_t da;
> > -   int num_rxb;
> > -   void *va;
> > -   int i;
> > -
> > -   mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize,
> > -                      &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom);
> > -
> > -   dev = mpc->ac->gdma_dev->gdma_context->dev;
> > -
> > -   num_rxb = num_queues * mpc->rx_queue_size;
> > -
> > -   WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n");
> > -   mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL);
> > -   if (!mpc->rxbufs_pre)
> > -           goto error;
> >  
> > -   mpc->das_pre = kmalloc_array(num_rxb, sizeof(dma_addr_t), GFP_KERNEL);
> > -   if (!mpc->das_pre)
> > -           goto error;
> > -
> > -   mpc->rxbpre_total = 0;
> > -
> > -   for (i = 0; i < num_rxb; i++) {
> > -           page = dev_alloc_pages(get_order(mpc->rxbpre_alloc_size));
> > -           if (!page)
> > -                   goto error;
> > -
> > -           va = page_to_virt(page);
> > -
> > -           da = dma_map_single(dev, va + mpc->rxbpre_headroom,
> > -                               mpc->rxbpre_datasize, DMA_FROM_DEVICE);
> > -           if (dma_mapping_error(dev, da)) {
> > -                   put_page(page);
> > -                   goto error;
> > +   /* For xdp and jumbo frames make sure only one packet fits per page */
> > +   if (((mtu + MANA_RXBUF_PAD) > PAGE_SIZE / 2) || 
> > rcu_access_pointer(apc->bpf_prog)) {
> 
> The line above seems to have unnecessary parentheses.
> And should be line wrapped to be 80 columns wide or less,
> as is still preferred by Networking code.
> The latter condition is flagged by checkpatch.pl --max-line-length=80
> 
>       if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 ||
>           rcu_access_pointer(apc->bpf_prog)) {
> 
> (The above is completely untested)
> 
> Also, I am a little confused by the use of rcu_access_pointer()
> above and below, as bpf_prog does not seem to be managed by RCU.
> 
> Flagged by Sparse.
> 
> > +           if (rcu_access_pointer(apc->bpf_prog)) {
> > +                   *headroom = XDP_PACKET_HEADROOM;
> > +                   *alloc_size = PAGE_SIZE;
> > +           } else {
> > +                   *headroom = 0; /* no support for XDP */
> > +                   *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + 
> > *headroom);
> >             }
> >  
> > -           mpc->rxbufs_pre[i] = va;
> > -           mpc->das_pre[i] = da;
> > -           mpc->rxbpre_total = i + 1;
> > +           *frag_count = 1;
> > +           return;
> >     }
> >  
> > -   return 0;
> > +   /* Standard MTU case - optimize for multiple packets per page */
> > +   *headroom = 0;
> >  
> > -error:
> > -   netdev_err(mpc->ndev, "Failed to pre-allocate RX buffers for %d 
> > queues\n", num_queues);
> > -   mana_pre_dealloc_rxbufs(mpc);
> > -   return -ENOMEM;
> > +   /* Calculate base buffer size needed */
> > +   u32 len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom);
> > +   u32 buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT);
> > +
> > +   /* Calculate how many packets can fit in a page */
> > +   *frag_count = PAGE_SIZE / buf_size;
> > +   *alloc_size = buf_size;
> >  }
> 
> ...
> 
> > -static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
> > -                        dma_addr_t *da, bool *from_pool)
> > +static void *mana_get_rxfrag(struct mana_rxq *rxq,
> > +                        struct device *dev, dma_addr_t *da, bool 
> > *from_pool)
> >  {
> >     struct page *page;
> > +   u32 offset;
> >     void *va;
> > -
> >     *from_pool = false;
> >  
> > -   /* Reuse XDP dropped page if available */
> > -   if (rxq->xdp_save_va) {
> > -           va = rxq->xdp_save_va;
> > -           rxq->xdp_save_va = NULL;
> > -   } else {
> > -           page = page_pool_dev_alloc_pages(rxq->page_pool);
> > -           if (!page)
> > +   /* Don't use fragments for jumbo frames or XDP (i.e when fragment = 1 
> > per page) */
> > +   if (rxq->frag_count == 1) {
> > +           /* Reuse XDP dropped page if available */
> > +           if (rxq->xdp_save_va) {
> > +                   va = rxq->xdp_save_va;
> > +                   rxq->xdp_save_va = NULL;
> > +           } else {
> > +                   page = page_pool_dev_alloc_pages(rxq->page_pool);
> > +                   if (!page)
> > +                           return NULL;
> > +
> > +                   *from_pool = true;
> > +                   va = page_to_virt(page);
> > +           }
> > +
> > +           *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
> > +                                DMA_FROM_DEVICE);
> > +           if (dma_mapping_error(dev, *da)) {
> > +                   if (*from_pool)
> > +                           page_pool_put_full_page(rxq->page_pool, page, 
> > false);
> > +                   else
> > +                           put_page(virt_to_head_page(va));
> 
> The put logic above seems to appear in this patch
> more than once. IMHO a helper would be nice.
> 
> > +
> >                     return NULL;
> > +           }
> >  
> > -           *from_pool = true;
> > -           va = page_to_virt(page);
> > +           return va;
> >     }
> 
> ...
> 
> -- 
> pw-bot: changes-requested

Reply via email to