RE: [PATCH net-next v3 5/6] net: stmmac: Add support for XDP_TX action

2021-03-31 Thread Ong, Boon Leong
>> +static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
>> +struct xdp_buff *xdp)
>> +{
>> +struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
>> +int cpu = smp_processor_id();
>> +struct netdev_queue *nq;
>> +int queue;
>> +int res;
>> +
>> +if (unlikely(!xdpf))
>> +return -EFAULT;
>
>Can you return -EFAULT here? looks like the function is otherwise
>returning positive STMMAC_XDP_* return codes/masks.

Good catch. Thanks. It should return STMMAC_XDP_CONSUMED. 

>
>> +queue = stmmac_xdp_get_tx_queue(priv, cpu);
>> +nq = netdev_get_tx_queue(priv->dev, queue);
>> +
>> +__netif_tx_lock(nq, cpu);
>> +/* Avoids TX time-out as we are sharing with slow path */
>> +nq->trans_start = jiffies;
>> +res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf);
>> +if (res == STMMAC_XDP_TX) {
>> +stmmac_flush_tx_descriptors(priv, queue);
>> +stmmac_tx_timer_arm(priv, queue);
>
>Would it make sense to arm the timer and flush descriptors at the end
>of the NAPI poll cycle? Instead of after every TX frame?
Agree. The Tx clean timer function can be scheduled once at the end of
the NAPI poll for better optimization. 


>
>> +}
>> +__netif_tx_unlock(nq);
>> +
>> +return res;
>> +}
>
>> @@ -4365,16 +4538,26 @@ static int stmmac_rx(struct stmmac_priv *priv,
>int limit, u32 queue)
>>  xdp.data_hard_start = page_address(buf->page);
>>  xdp_set_data_meta_invalid();
>>  xdp.frame_sz = buf_sz;
>> +xdp.rxq = _q->xdp_rxq;
>>
>> +pre_len = xdp.data_end - xdp.data_hard_start -
>> +  buf->page_offset;
>>  skb = stmmac_xdp_run_prog(priv, );
>> +/* Due xdp_adjust_tail: DMA sync for_device
>> + * cover max len CPU touch
>> + */
>> +sync_len = xdp.data_end - xdp.data_hard_start -
>> +   buf->page_offset;
>> +sync_len = max(sync_len, pre_len);
>>
>>  /* For Not XDP_PASS verdict */
>>  if (IS_ERR(skb)) {
>>  unsigned int xdp_res = -PTR_ERR(skb);
>>
>>  if (xdp_res & STMMAC_XDP_CONSUMED) {
>> -page_pool_recycle_direct(rx_q-
>>page_pool,
>> - buf->page);
>> +page_pool_put_page(rx_q-
>>page_pool,
>> +
>virt_to_head_page(xdp.data),
>> +   sync_len, true);
>
>IMHO the dma_sync_size logic is a little question, but it's not really
>related to your patch, others are already doing the same thing, so it's
>fine, I guess.
Ok. We may leave it as it is now until a better/cleaner solution is found.

 


Re: [PATCH net-next v3 5/6] net: stmmac: Add support for XDP_TX action

2021-03-31 Thread Jakub Kicinski
On Wed, 31 Mar 2021 23:41:34 +0800 Ong Boon Leong wrote:
> This patch adds support for XDP_TX action which enables XDP program to
> transmit back received frames.
> 
> This patch has been tested with the "xdp2" app located in samples/bpf
> dir. The DUT receives burst traffic packet generated using pktgen script
> 'pktgen_sample03_burst_single_flow.sh'.
> 
> v3: Added 'nq->trans_start = jiffies' to avoid TX time-out as we are
> sharing TX queue between slow path and XDP. Thanks to Jakub Kicinski
> for pointing out.
> 
> Signed-off-by: Ong Boon Leong 

> +static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
> + struct xdp_buff *xdp)
> +{
> + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
> + int cpu = smp_processor_id();
> + struct netdev_queue *nq;
> + int queue;
> + int res;
> +
> + if (unlikely(!xdpf))
> + return -EFAULT;

Can you return -EFAULT here? looks like the function is otherwise
returning positive STMMAC_XDP_* return codes/masks.

> + queue = stmmac_xdp_get_tx_queue(priv, cpu);
> + nq = netdev_get_tx_queue(priv->dev, queue);
> +
> + __netif_tx_lock(nq, cpu);
> + /* Avoids TX time-out as we are sharing with slow path */
> + nq->trans_start = jiffies;
> + res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf);
> + if (res == STMMAC_XDP_TX) {
> + stmmac_flush_tx_descriptors(priv, queue);
> + stmmac_tx_timer_arm(priv, queue);

Would it make sense to arm the timer and flush descriptors at the end
of the NAPI poll cycle? Instead of after every TX frame?

> + }
> + __netif_tx_unlock(nq);
> +
> + return res;
> +}

> @@ -4365,16 +4538,26 @@ static int stmmac_rx(struct stmmac_priv *priv, int 
> limit, u32 queue)
>   xdp.data_hard_start = page_address(buf->page);
>   xdp_set_data_meta_invalid();
>   xdp.frame_sz = buf_sz;
> + xdp.rxq = _q->xdp_rxq;
>  
> + pre_len = xdp.data_end - xdp.data_hard_start -
> +   buf->page_offset;
>   skb = stmmac_xdp_run_prog(priv, );
> + /* Due xdp_adjust_tail: DMA sync for_device
> +  * cover max len CPU touch
> +  */
> + sync_len = xdp.data_end - xdp.data_hard_start -
> +buf->page_offset;
> + sync_len = max(sync_len, pre_len);
>  
>   /* For Not XDP_PASS verdict */
>   if (IS_ERR(skb)) {
>   unsigned int xdp_res = -PTR_ERR(skb);
>  
>   if (xdp_res & STMMAC_XDP_CONSUMED) {
> - 
> page_pool_recycle_direct(rx_q->page_pool,
> -  buf->page);
> + page_pool_put_page(rx_q->page_pool,
> +
> virt_to_head_page(xdp.data),
> +sync_len, true);

IMHO the dma_sync_size logic is a little question, but it's not really
related to your patch, others are already doing the same thing, so it's
fine, I guess.

>   buf->page = NULL;
>   priv->dev->stats.rx_dropped++;



[PATCH net-next v3 5/6] net: stmmac: Add support for XDP_TX action

2021-03-31 Thread Ong Boon Leong
This patch adds support for XDP_TX action which enables XDP program to
transmit back received frames.

This patch has been tested with the "xdp2" app located in samples/bpf
dir. The DUT receives burst traffic packet generated using pktgen script
'pktgen_sample03_burst_single_flow.sh'.

v3: Added 'nq->trans_start = jiffies' to avoid TX time-out as we are
sharing TX queue between slow path and XDP. Thanks to Jakub Kicinski
for pointing out.

Signed-off-by: Ong Boon Leong 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |  12 +-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 222 --
 2 files changed, 216 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h 
b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e72224c8fbac..a93e22a6be59 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -36,12 +36,18 @@ struct stmmac_resources {
int tx_irq[MTL_MAX_TX_QUEUES];
 };
 
+enum stmmac_txbuf_type {
+   STMMAC_TXBUF_T_SKB,
+   STMMAC_TXBUF_T_XDP_TX,
+};
+
 struct stmmac_tx_info {
dma_addr_t buf;
bool map_as_page;
unsigned len;
bool last_segment;
bool is_jumbo;
+   enum stmmac_txbuf_type buf_type;
 };
 
 #define STMMAC_TBS_AVAIL   BIT(0)
@@ -57,7 +63,10 @@ struct stmmac_tx_queue {
struct dma_extended_desc *dma_etx cacheline_aligned_in_smp;
struct dma_edesc *dma_entx;
struct dma_desc *dma_tx;
-   struct sk_buff **tx_skbuff;
+   union {
+   struct sk_buff **tx_skbuff;
+   struct xdp_frame **xdpf;
+   };
struct stmmac_tx_info *tx_skbuff_dma;
unsigned int cur_tx;
unsigned int dirty_tx;
@@ -77,6 +86,7 @@ struct stmmac_rx_buffer {
 struct stmmac_rx_queue {
u32 rx_count_frames;
u32 queue_index;
+   struct xdp_rxq_info xdp_rxq;
struct page_pool *page_pool;
struct stmmac_rx_buffer *buf_pool;
struct stmmac_priv *priv_data;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0dad8ab93eb5..35e9a738d663 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -71,6 +71,7 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
 
 #define STMMAC_XDP_PASS0
 #define STMMAC_XDP_CONSUMEDBIT(0)
+#define STMMAC_XDP_TX  BIT(1)
 
 static int flow_ctrl = FLOW_AUTO;
 module_param(flow_ctrl, int, 0644);
@@ -1442,7 +1443,8 @@ static void stmmac_free_tx_buffer(struct stmmac_priv 
*priv, u32 queue, int i)
 {
struct stmmac_tx_queue *tx_q = >tx_queue[queue];
 
-   if (tx_q->tx_skbuff_dma[i].buf) {
+   if (tx_q->tx_skbuff_dma[i].buf &&
+   tx_q->tx_skbuff_dma[i].buf_type != STMMAC_TXBUF_T_XDP_TX) {
if (tx_q->tx_skbuff_dma[i].map_as_page)
dma_unmap_page(priv->device,
   tx_q->tx_skbuff_dma[i].buf,
@@ -1455,12 +1457,20 @@ static void stmmac_free_tx_buffer(struct stmmac_priv 
*priv, u32 queue, int i)
 DMA_TO_DEVICE);
}
 
-   if (tx_q->tx_skbuff[i]) {
+   if (tx_q->xdpf[i] &&
+   tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_TX) {
+   xdp_return_frame(tx_q->xdpf[i]);
+   tx_q->xdpf[i] = NULL;
+   }
+
+   if (tx_q->tx_skbuff[i] &&
+   tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_SKB) {
dev_kfree_skb_any(tx_q->tx_skbuff[i]);
tx_q->tx_skbuff[i] = NULL;
-   tx_q->tx_skbuff_dma[i].buf = 0;
-   tx_q->tx_skbuff_dma[i].map_as_page = false;
}
+
+   tx_q->tx_skbuff_dma[i].buf = 0;
+   tx_q->tx_skbuff_dma[i].map_as_page = false;
 }
 
 /**
@@ -1568,6 +1578,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, 
gfp_t flags)
 
for (queue = 0; queue < rx_count; queue++) {
struct stmmac_rx_queue *rx_q = >rx_queue[queue];
+   int ret;
 
netif_dbg(priv, probe, priv->dev,
  "(%s) dma_rx_phy=0x%08x\n", __func__,
@@ -1575,6 +1586,14 @@ static int init_dma_rx_desc_rings(struct net_device 
*dev, gfp_t flags)
 
stmmac_clear_rx_descriptors(priv, queue);
 
+   WARN_ON(xdp_rxq_info_reg_mem_model(_q->xdp_rxq,
+  MEM_TYPE_PAGE_POOL,
+  rx_q->page_pool));
+
+   netdev_info(priv->dev,
+   "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
+   rx_q->queue_index);
+
for (i = 0; i < priv->dma_rx_size; i++) {
struct dma_desc *p;
 
@@ -1775,6 +1794,9 @@ static void free_dma_rx_desc_resources(struct stmmac_priv 
*priv)