From: Long Wu <long...@corigine.com> This follows the mainline Linux kernel commit 0dcf7f500b0a (nfp: use TX ring pointer write back) by Jakub Kicinski.
This will speed up Tx completions, because we avoid a read from device memory (replacing PCIe read with DMA read), and it works on CoreNIC firmware with NFDk. Signed-off-by: Long Wu <long...@corigine.com> Reviewed-by: Chaoyong He <chaoyong...@corigine.com> --- drivers/net/nfp/nfdk/nfp_nfdk_dp.c | 7 ++++++ drivers/net/nfp/nfp_ethdev.c | 16 +++++++++++++- drivers/net/nfp/nfp_net_common.c | 34 ++++++++++++++++++++++++++++++ drivers/net/nfp/nfp_net_common.h | 5 +++++ drivers/net/nfp/nfp_rxtx.c | 28 +++++++++++++++++++++++- drivers/net/nfp/nfp_rxtx.h | 6 ++++++ 6 files changed, 94 insertions(+), 2 deletions(-) diff --git a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c index 1911736e2b..41cdfd3a40 100644 --- a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c +++ b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c @@ -544,6 +544,13 @@ nfp_net_nfdk_tx_queue_setup(struct rte_eth_dev *dev, return -ENOMEM; } + if (hw->txrwb_mz != NULL) { + txq->txrwb = (uint64_t *)hw->txrwb_mz->addr + queue_idx; + txq->txrwb_dma = (uint64_t)hw->txrwb_mz->iova + + queue_idx * sizeof(uint64_t); + nn_cfg_writeq(&hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), txq->txrwb_dma); + } + nfp_net_reset_tx_queue(txq); dev->data->tx_queues[queue_idx] = txq; diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c index 568de1d024..b711e15b9f 100644 --- a/drivers/net/nfp/nfp_ethdev.c +++ b/drivers/net/nfp/nfp_ethdev.c @@ -360,6 +360,9 @@ nfp_net_start(struct rte_eth_dev *dev) if ((hw->cap & NFP_NET_CFG_CTRL_RINGCFG) != 0) new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG; + if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0) + new_ctrl |= NFP_NET_CFG_CTRL_TXRWB; + if (nfp_reconfig(hw, new_ctrl, update) != 0) return -EIO; @@ -577,6 +580,8 @@ nfp_net_uninit(struct rte_eth_dev *eth_dev) nfp_net_flow_priv_uninit(net_hw->pf_dev, net_hw->idx); rte_free(net_hw->eth_xstats_base); + if ((net_hw->super.cap & NFP_NET_CFG_CTRL_TXRWB) != 0) + nfp_net_txrwb_free(eth_dev); nfp_ipsec_uninit(eth_dev); if (net_hw->mac_stats_area != NULL) nfp_cpp_area_release_free(net_hw->mac_stats_area); @@ -987,6 +992,12 @@ nfp_net_init(struct rte_eth_dev *eth_dev) goto xstats_free; } + if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0) { + err = nfp_net_txrwb_alloc(eth_dev); + if (err != 0) + goto xstats_free; + } + nfp_net_pf_read_mac(app_fw_nic, port); nfp_write_mac(hw, &hw->mac_addr.addr_bytes[0]); @@ -1025,12 +1036,15 @@ nfp_net_init(struct rte_eth_dev *eth_dev) err = nfp_net_flow_priv_init(pf_dev, port); if (err != 0) { PMD_INIT_LOG(ERR, "Init net flow priv failed"); - goto xstats_free; + goto txrwb_free; } } return 0; +txrwb_free: + if ((hw->cap & NFP_NET_CFG_CTRL_TXRWB) != 0) + nfp_net_txrwb_free(eth_dev); xstats_free: rte_free(net_hw->eth_xstats_base); ipsec_exit: diff --git a/drivers/net/nfp/nfp_net_common.c b/drivers/net/nfp/nfp_net_common.c index c6889949ff..da1a7e7be1 100644 --- a/drivers/net/nfp/nfp_net_common.c +++ b/drivers/net/nfp/nfp_net_common.c @@ -2039,6 +2039,40 @@ nfp_net_check_dma_mask(struct nfp_net_hw *hw, return 0; } +int +nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev) +{ + struct nfp_net_hw *net_hw; + char mz_name[RTE_MEMZONE_NAMESIZE]; + + net_hw = nfp_net_get_hw(eth_dev); + snprintf(mz_name, sizeof(mz_name), "%s_TXRWB", eth_dev->data->name); + net_hw->txrwb_mz = rte_memzone_reserve_aligned(mz_name, + net_hw->max_tx_queues * sizeof(uint64_t), + rte_socket_id(), + RTE_MEMZONE_IOVA_CONTIG, RTE_CACHE_LINE_SIZE); + if (net_hw->txrwb_mz == NULL) { + PMD_INIT_LOG(ERR, "Failed to alloc %s for TX ring write back", + mz_name); + return -ENOMEM; + } + + return 0; +} + +void +nfp_net_txrwb_free(struct rte_eth_dev *eth_dev) +{ + struct nfp_net_hw *net_hw; + + net_hw = nfp_net_get_hw(eth_dev); + if (net_hw->txrwb_mz == NULL) + return; + + rte_memzone_free(net_hw->txrwb_mz); + net_hw->txrwb_mz = NULL; +} + void nfp_net_cfg_read_version(struct nfp_net_hw *hw) { diff --git a/drivers/net/nfp/nfp_net_common.h b/drivers/net/nfp/nfp_net_common.h index 49a5a84044..8066e77e6f 100644 --- a/drivers/net/nfp/nfp_net_common.h +++ b/drivers/net/nfp/nfp_net_common.h @@ -168,6 +168,9 @@ struct nfp_net_hw { /** Backpointer to the eth_dev of this port */ struct rte_eth_dev *eth_dev; + /** TX pointer ring write back memzone */ + const struct rte_memzone *txrwb_mz; + /** Info from the firmware */ struct nfp_net_fw_ver ver; uint32_t max_mtu; @@ -321,6 +324,8 @@ int nfp_net_fec_set(struct rte_eth_dev *dev, uint32_t fec_capa); void nfp_net_get_fw_version(struct nfp_net_hw *hw, uint32_t *fw_version); +int nfp_net_txrwb_alloc(struct rte_eth_dev *eth_dev); +void nfp_net_txrwb_free(struct rte_eth_dev *eth_dev); #define NFP_PRIV_TO_APP_FW_NIC(app_fw_priv)\ ((struct nfp_app_fw_nic *)app_fw_priv) diff --git a/drivers/net/nfp/nfp_rxtx.c b/drivers/net/nfp/nfp_rxtx.c index 1aee3ecb3f..f9c4636688 100644 --- a/drivers/net/nfp/nfp_rxtx.c +++ b/drivers/net/nfp/nfp_rxtx.c @@ -695,6 +695,26 @@ nfp_net_rx_queue_setup(struct rte_eth_dev *dev, return 0; } +static inline uint32_t +nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq) +{ + /* + * If TX ring pointer write back is not supported, do a PCIe read. + * Otherwise read qcp value from write back dma address. + */ + if (txq->txrwb == NULL) + return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); + + /* + * In most cases the TX count is a power of two and the costly modulus + * operation can be substituted with a subtraction and an AND operation. + */ + if (rte_is_power_of_2(txq->tx_count) == 1) + return (*txq->txrwb) & (txq->tx_count - 1); + else + return (*txq->txrwb) % txq->tx_count; +} + /** * Check for descriptors with a complete status * @@ -714,7 +734,7 @@ nfp_net_tx_free_bufs(struct nfp_net_txq *txq) " status", txq->qidx); /* Work out how many packets have been sent */ - qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); + qcp_rd_p = nfp_net_read_tx_free_qcp(txq); if (qcp_rd_p == txq->rd_p) { PMD_TX_LOG(DEBUG, "queue %hu: It seems harrier is not sending " @@ -761,9 +781,13 @@ void nfp_net_tx_queue_release(struct rte_eth_dev *dev, uint16_t queue_idx) { + struct nfp_net_hw *net_hw; struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx]; if (txq != NULL) { + net_hw = nfp_net_get_hw(dev); + if (net_hw->txrwb_mz != NULL) + nn_cfg_writeq(&net_hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0); nfp_net_tx_queue_release_mbufs(txq); rte_eth_dma_zone_free(dev, "tx_ring", queue_idx); rte_free(txq->txbufs); @@ -777,6 +801,8 @@ nfp_net_reset_tx_queue(struct nfp_net_txq *txq) nfp_net_tx_queue_release_mbufs(txq); txq->wr_p = 0; txq->rd_p = 0; + if (txq->txrwb != NULL) + *txq->txrwb = 0; } int diff --git a/drivers/net/nfp/nfp_rxtx.h b/drivers/net/nfp/nfp_rxtx.h index 6ecabc232c..f463b9cf75 100644 --- a/drivers/net/nfp/nfp_rxtx.h +++ b/drivers/net/nfp/nfp_rxtx.h @@ -77,6 +77,12 @@ struct nfp_net_txq { * in a cache line. */ uint64_t dma; + + /** TX pointer ring write back area (indexed by queue id) */ + uint64_t *txrwb; + + /** TX pointer ring write back area DMA address */ + uint64_t txrwb_dma; } __rte_aligned(64); /* RX and freelist descriptor format */ -- 2.39.1