From: Long Wu <long...@corigine.com>

This follows the mainline Linux kernel commit
0dcf7f500b0a (nfp: use TX ring pointer write back) by Jakub Kicinski.

This will speed up Tx completions, because we avoid a read from
device memory (replacing PCIe read with DMA read), and it works on
CoreNIC firmware with NFDk.

Signed-off-by: Long Wu <long...@corigine.com>
Reviewed-by: Chaoyong He <chaoyong...@corigine.com>
---
 drivers/net/nfp/nfdk/nfp_nfdk_dp.c |  7 ++++++
 drivers/net/nfp/nfp_ethdev.c       | 16 +++++++++++++-
 drivers/net/nfp/nfp_net_common.c   | 34 ++++++++++++++++++++++++++++++
 drivers/net/nfp/nfp_net_common.h   |  5 +++++
 drivers/net/nfp/nfp_rxtx.c         | 28 +++++++++++++++++++++++-
 drivers/net/nfp/nfp_rxtx.h         |  6 ++++++
 6 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c 
b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c
index 1911736e2b..41cdfd3a40 100644
--- a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c
+++ b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c
@@ -544,6 +544,13 @@ nfp_net_nfdk_tx_queue_setup(struct rte_eth_dev *dev,
                return -ENOMEM;
        }
 
+       if (hw->txrwb_mz != NULL) {
+               txq->txrwb = (uint64_t *)hw->txrwb_mz->addr + queue_idx;
+               txq->txrwb_dma = (uint64_t)hw->txrwb_mz->iova +
+                               queue_idx * sizeof(uint64_t);
+               nn_cfg_writeq(&hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 
txq->txrwb_dma);
+       }
+
        nfp_net_reset_tx_queue(txq);
 
        dev->data->tx_queues[queue_idx] = txq;
diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 568de1d024..b711e15b9f 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -360,6 +360,9 @@ nfp_net_start(struct rte_eth_dev *dev)
        if ((hw->cap & NFP_NET_CFG_CTRL_RINGCFG) != 0)
                new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
 
+       if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+               new_ctrl |= NFP_NET_CFG_CTRL_TXRWB;
+
        if (nfp_reconfig(hw, new_ctrl, update) != 0)
                return -EIO;
 
@@ -577,6 +580,8 @@ nfp_net_uninit(struct rte_eth_dev *eth_dev)
                nfp_net_flow_priv_uninit(net_hw->pf_dev, net_hw->idx);
 
        rte_free(net_hw->eth_xstats_base);
+       if ((net_hw->super.cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+               nfp_net_txrwb_free(eth_dev);
        nfp_ipsec_uninit(eth_dev);
        if (net_hw->mac_stats_area != NULL)
                nfp_cpp_area_release_free(net_hw->mac_stats_area);
@@ -987,6 +992,12 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                goto xstats_free;
        }
 
+       if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0) {
+               err = nfp_net_txrwb_alloc(eth_dev);
+               if (err != 0)
+                       goto xstats_free;
+       }
+
        nfp_net_pf_read_mac(app_fw_nic, port);
        nfp_write_mac(hw, &hw->mac_addr.addr_bytes[0]);
 
@@ -1025,12 +1036,15 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
                err = nfp_net_flow_priv_init(pf_dev, port);
                if (err != 0) {
                        PMD_INIT_LOG(ERR, "Init net flow priv failed");
-                       goto xstats_free;
+                       goto txrwb_free;
                }
        }
 
        return 0;
 
+txrwb_free:
+       if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0)
+               nfp_net_txrwb_free(eth_dev);
 xstats_free:
        rte_free(net_hw->eth_xstats_base);
 ipsec_exit:
diff --git a/drivers/net/nfp/nfp_net_common.c b/drivers/net/nfp/nfp_net_common.c
index c6889949ff..da1a7e7be1 100644
--- a/drivers/net/nfp/nfp_net_common.c
+++ b/drivers/net/nfp/nfp_net_common.c
@@ -2039,6 +2039,40 @@ nfp_net_check_dma_mask(struct nfp_net_hw *hw,
        return 0;
 }
 
+int
+nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev)
+{
+       struct nfp_net_hw *net_hw;
+       char mz_name[RTE_MEMZONE_NAMESIZE];
+
+       net_hw = nfp_net_get_hw(eth_dev);
+       snprintf(mz_name, sizeof(mz_name), "%s_TXRWB", eth_dev->data->name);
+       net_hw->txrwb_mz = rte_memzone_reserve_aligned(mz_name,
+                       net_hw->max_tx_queues * sizeof(uint64_t),
+                       rte_socket_id(),
+                       RTE_MEMZONE_IOVA_CONTIG, RTE_CACHE_LINE_SIZE);
+       if (net_hw->txrwb_mz == NULL) {
+               PMD_INIT_LOG(ERR, "Failed to alloc %s for TX ring write back",
+                               mz_name);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+void
+nfp_net_txrwb_free(struct rte_eth_dev *eth_dev)
+{
+       struct nfp_net_hw *net_hw;
+
+       net_hw = nfp_net_get_hw(eth_dev);
+       if (net_hw->txrwb_mz == NULL)
+               return;
+
+       rte_memzone_free(net_hw->txrwb_mz);
+       net_hw->txrwb_mz = NULL;
+}
+
 void
 nfp_net_cfg_read_version(struct nfp_net_hw *hw)
 {
diff --git a/drivers/net/nfp/nfp_net_common.h b/drivers/net/nfp/nfp_net_common.h
index 49a5a84044..8066e77e6f 100644
--- a/drivers/net/nfp/nfp_net_common.h
+++ b/drivers/net/nfp/nfp_net_common.h
@@ -168,6 +168,9 @@ struct nfp_net_hw {
        /** Backpointer to the eth_dev of this port */
        struct rte_eth_dev *eth_dev;
 
+       /** TX pointer ring write back memzone */
+       const struct rte_memzone *txrwb_mz;
+
        /** Info from the firmware */
        struct nfp_net_fw_ver ver;
        uint32_t max_mtu;
@@ -321,6 +324,8 @@ int nfp_net_fec_set(struct rte_eth_dev *dev,
                uint32_t fec_capa);
 void nfp_net_get_fw_version(struct nfp_net_hw *hw,
                uint32_t *fw_version);
+int nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev);
+void nfp_net_txrwb_free(struct rte_eth_dev *eth_dev);
 
 #define NFP_PRIV_TO_APP_FW_NIC(app_fw_priv)\
        ((struct nfp_app_fw_nic *)app_fw_priv)
diff --git a/drivers/net/nfp/nfp_rxtx.c b/drivers/net/nfp/nfp_rxtx.c
index 1aee3ecb3f..f9c4636688 100644
--- a/drivers/net/nfp/nfp_rxtx.c
+++ b/drivers/net/nfp/nfp_rxtx.c
@@ -695,6 +695,26 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev,
        return 0;
 }
 
+static inline uint32_t
+nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq)
+{
+       /*
+        * If TX ring pointer write back is not supported, do a PCIe read.
+        * Otherwise read qcp value from write back dma address.
+        */
+       if (txq->txrwb == NULL)
+               return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
+
+       /*
+        * In most cases the TX count is a power of two and the costly modulus
+        * operation can be substituted with a subtraction and an AND operation.
+        */
+       if (rte_is_power_of_2(txq->tx_count) == 1)
+               return (*txq->txrwb) & (txq->tx_count - 1);
+       else
+               return (*txq->txrwb) % txq->tx_count;
+}
+
 /**
  * Check for descriptors with a complete status
  *
@@ -714,7 +734,7 @@ nfp_net_tx_free_bufs(struct nfp_net_txq *txq)
                        " status", txq->qidx);
 
        /* Work out how many packets have been sent */
-       qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
+       qcp_rd_p = nfp_net_read_tx_free_qcp(txq);
 
        if (qcp_rd_p == txq->rd_p) {
                PMD_TX_LOG(DEBUG, "queue %hu: It seems harrier is not sending "
@@ -761,9 +781,13 @@ void
 nfp_net_tx_queue_release(struct rte_eth_dev *dev,
                uint16_t queue_idx)
 {
+       struct nfp_net_hw *net_hw;
        struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx];
 
        if (txq != NULL) {
+               net_hw = nfp_net_get_hw(dev);
+               if (net_hw->txrwb_mz != NULL)
+                       nn_cfg_writeq(&net_hw->super, 
NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0);
                nfp_net_tx_queue_release_mbufs(txq);
                rte_eth_dma_zone_free(dev, "tx_ring", queue_idx);
                rte_free(txq->txbufs);
@@ -777,6 +801,8 @@ nfp_net_reset_tx_queue(struct nfp_net_txq *txq)
        nfp_net_tx_queue_release_mbufs(txq);
        txq->wr_p = 0;
        txq->rd_p = 0;
+       if (txq->txrwb != NULL)
+               *txq->txrwb = 0;
 }
 
 int
diff --git a/drivers/net/nfp/nfp_rxtx.h b/drivers/net/nfp/nfp_rxtx.h
index 6ecabc232c..f463b9cf75 100644
--- a/drivers/net/nfp/nfp_rxtx.h
+++ b/drivers/net/nfp/nfp_rxtx.h
@@ -77,6 +77,12 @@ struct nfp_net_txq {
         * in a cache line.
         */
        uint64_t dma;
+
+       /** TX pointer ring write back area (indexed by queue id) */
+       uint64_t *txrwb;
+
+       /** TX pointer ring write back area DMA address */
+       uint64_t txrwb_dma;
 } __rte_aligned(64);
 
 /* RX and freelist descriptor format */
-- 
2.39.1

Reply via email to