RE: [PATCH net-next v3 5/6] net: stmmac: Add support for XDP_TX action
>> +static int stmmac_xdp_xmit_back(struct stmmac_priv *priv, >> +struct xdp_buff *xdp) >> +{ >> +struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); >> +int cpu = smp_processor_id(); >> +struct netdev_queue *nq; >> +int queue; >> +int res; >> + >> +if (unlikely(!xdpf)) >> +return -EFAULT; > >Can you return -EFAULT here? looks like the function is otherwise >returning positive STMMAC_XDP_* return codes/masks. Good catch. Thanks. It should return STMMAC_XDP_CONSUMED. > >> +queue = stmmac_xdp_get_tx_queue(priv, cpu); >> +nq = netdev_get_tx_queue(priv->dev, queue); >> + >> +__netif_tx_lock(nq, cpu); >> +/* Avoids TX time-out as we are sharing with slow path */ >> +nq->trans_start = jiffies; >> +res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf); >> +if (res == STMMAC_XDP_TX) { >> +stmmac_flush_tx_descriptors(priv, queue); >> +stmmac_tx_timer_arm(priv, queue); > >Would it make sense to arm the timer and flush descriptors at the end >of the NAPI poll cycle? Instead of after every TX frame? Agree. The Tx clean timer function can be scheduled once at the end of the NAPI poll for better optimization. > >> +} >> +__netif_tx_unlock(nq); >> + >> +return res; >> +} > >> @@ -4365,16 +4538,26 @@ static int stmmac_rx(struct stmmac_priv *priv, >int limit, u32 queue) >> xdp.data_hard_start = page_address(buf->page); >> xdp_set_data_meta_invalid(); >> xdp.frame_sz = buf_sz; >> +xdp.rxq = _q->xdp_rxq; >> >> +pre_len = xdp.data_end - xdp.data_hard_start - >> + buf->page_offset; >> skb = stmmac_xdp_run_prog(priv, ); >> +/* Due xdp_adjust_tail: DMA sync for_device >> + * cover max len CPU touch >> + */ >> +sync_len = xdp.data_end - xdp.data_hard_start - >> + buf->page_offset; >> +sync_len = max(sync_len, pre_len); >> >> /* For Not XDP_PASS verdict */ >> if (IS_ERR(skb)) { >> unsigned int xdp_res = -PTR_ERR(skb); >> >> if (xdp_res & STMMAC_XDP_CONSUMED) { >> -page_pool_recycle_direct(rx_q- >>page_pool, >> - buf->page); >> +page_pool_put_page(rx_q- >>page_pool, >> + >virt_to_head_page(xdp.data), >> + sync_len, true); > >IMHO the dma_sync_size logic is a little question, but it's not really >related to your patch, others are already doing the same thing, so it's >fine, I guess. Ok. We may leave it as it is now until a better/cleaner solution is found.
Re: [PATCH net-next v3 5/6] net: stmmac: Add support for XDP_TX action
On Wed, 31 Mar 2021 23:41:34 +0800 Ong Boon Leong wrote: > This patch adds support for XDP_TX action which enables XDP program to > transmit back received frames. > > This patch has been tested with the "xdp2" app located in samples/bpf > dir. The DUT receives burst traffic packet generated using pktgen script > 'pktgen_sample03_burst_single_flow.sh'. > > v3: Added 'nq->trans_start = jiffies' to avoid TX time-out as we are > sharing TX queue between slow path and XDP. Thanks to Jakub Kicinski > for pointing out. > > Signed-off-by: Ong Boon Leong > +static int stmmac_xdp_xmit_back(struct stmmac_priv *priv, > + struct xdp_buff *xdp) > +{ > + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); > + int cpu = smp_processor_id(); > + struct netdev_queue *nq; > + int queue; > + int res; > + > + if (unlikely(!xdpf)) > + return -EFAULT; Can you return -EFAULT here? looks like the function is otherwise returning positive STMMAC_XDP_* return codes/masks. > + queue = stmmac_xdp_get_tx_queue(priv, cpu); > + nq = netdev_get_tx_queue(priv->dev, queue); > + > + __netif_tx_lock(nq, cpu); > + /* Avoids TX time-out as we are sharing with slow path */ > + nq->trans_start = jiffies; > + res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf); > + if (res == STMMAC_XDP_TX) { > + stmmac_flush_tx_descriptors(priv, queue); > + stmmac_tx_timer_arm(priv, queue); Would it make sense to arm the timer and flush descriptors at the end of the NAPI poll cycle? Instead of after every TX frame? > + } > + __netif_tx_unlock(nq); > + > + return res; > +} > @@ -4365,16 +4538,26 @@ static int stmmac_rx(struct stmmac_priv *priv, int > limit, u32 queue) > xdp.data_hard_start = page_address(buf->page); > xdp_set_data_meta_invalid(); > xdp.frame_sz = buf_sz; > + xdp.rxq = _q->xdp_rxq; > > + pre_len = xdp.data_end - xdp.data_hard_start - > + buf->page_offset; > skb = stmmac_xdp_run_prog(priv, ); > + /* Due xdp_adjust_tail: DMA sync for_device > + * cover max len CPU touch > + */ > + sync_len = xdp.data_end - xdp.data_hard_start - > +buf->page_offset; > + sync_len = max(sync_len, pre_len); > > /* For Not XDP_PASS verdict */ > if (IS_ERR(skb)) { > unsigned int xdp_res = -PTR_ERR(skb); > > if (xdp_res & STMMAC_XDP_CONSUMED) { > - > page_pool_recycle_direct(rx_q->page_pool, > - buf->page); > + page_pool_put_page(rx_q->page_pool, > + > virt_to_head_page(xdp.data), > +sync_len, true); IMHO the dma_sync_size logic is a little question, but it's not really related to your patch, others are already doing the same thing, so it's fine, I guess. > buf->page = NULL; > priv->dev->stats.rx_dropped++;
[PATCH net-next v3 5/6] net: stmmac: Add support for XDP_TX action
This patch adds support for XDP_TX action which enables XDP program to transmit back received frames. This patch has been tested with the "xdp2" app located in samples/bpf dir. The DUT receives burst traffic packet generated using pktgen script 'pktgen_sample03_burst_single_flow.sh'. v3: Added 'nq->trans_start = jiffies' to avoid TX time-out as we are sharing TX queue between slow path and XDP. Thanks to Jakub Kicinski for pointing out. Signed-off-by: Ong Boon Leong --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 12 +- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 222 -- 2 files changed, 216 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index e72224c8fbac..a93e22a6be59 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -36,12 +36,18 @@ struct stmmac_resources { int tx_irq[MTL_MAX_TX_QUEUES]; }; +enum stmmac_txbuf_type { + STMMAC_TXBUF_T_SKB, + STMMAC_TXBUF_T_XDP_TX, +}; + struct stmmac_tx_info { dma_addr_t buf; bool map_as_page; unsigned len; bool last_segment; bool is_jumbo; + enum stmmac_txbuf_type buf_type; }; #define STMMAC_TBS_AVAIL BIT(0) @@ -57,7 +63,10 @@ struct stmmac_tx_queue { struct dma_extended_desc *dma_etx cacheline_aligned_in_smp; struct dma_edesc *dma_entx; struct dma_desc *dma_tx; - struct sk_buff **tx_skbuff; + union { + struct sk_buff **tx_skbuff; + struct xdp_frame **xdpf; + }; struct stmmac_tx_info *tx_skbuff_dma; unsigned int cur_tx; unsigned int dirty_tx; @@ -77,6 +86,7 @@ struct stmmac_rx_buffer { struct stmmac_rx_queue { u32 rx_count_frames; u32 queue_index; + struct xdp_rxq_info xdp_rxq; struct page_pool *page_pool; struct stmmac_rx_buffer *buf_pool; struct stmmac_priv *priv_data; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0dad8ab93eb5..35e9a738d663 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -71,6 +71,7 @@ MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_XDP_PASS0 #define STMMAC_XDP_CONSUMEDBIT(0) +#define STMMAC_XDP_TX BIT(1) static int flow_ctrl = FLOW_AUTO; module_param(flow_ctrl, int, 0644); @@ -1442,7 +1443,8 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) { struct stmmac_tx_queue *tx_q = >tx_queue[queue]; - if (tx_q->tx_skbuff_dma[i].buf) { + if (tx_q->tx_skbuff_dma[i].buf && + tx_q->tx_skbuff_dma[i].buf_type != STMMAC_TXBUF_T_XDP_TX) { if (tx_q->tx_skbuff_dma[i].map_as_page) dma_unmap_page(priv->device, tx_q->tx_skbuff_dma[i].buf, @@ -1455,12 +1457,20 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) DMA_TO_DEVICE); } - if (tx_q->tx_skbuff[i]) { + if (tx_q->xdpf[i] && + tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_TX) { + xdp_return_frame(tx_q->xdpf[i]); + tx_q->xdpf[i] = NULL; + } + + if (tx_q->tx_skbuff[i] && + tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_SKB) { dev_kfree_skb_any(tx_q->tx_skbuff[i]); tx_q->tx_skbuff[i] = NULL; - tx_q->tx_skbuff_dma[i].buf = 0; - tx_q->tx_skbuff_dma[i].map_as_page = false; } + + tx_q->tx_skbuff_dma[i].buf = 0; + tx_q->tx_skbuff_dma[i].map_as_page = false; } /** @@ -1568,6 +1578,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) for (queue = 0; queue < rx_count; queue++) { struct stmmac_rx_queue *rx_q = >rx_queue[queue]; + int ret; netif_dbg(priv, probe, priv->dev, "(%s) dma_rx_phy=0x%08x\n", __func__, @@ -1575,6 +1586,14 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) stmmac_clear_rx_descriptors(priv, queue); + WARN_ON(xdp_rxq_info_reg_mem_model(_q->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx_q->page_pool)); + + netdev_info(priv->dev, + "Register MEM_TYPE_PAGE_POOL RxQ-%d\n", + rx_q->queue_index); + for (i = 0; i < priv->dma_rx_size; i++) { struct dma_desc *p; @@ -1775,6 +1794,9 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)