Add support for timestamp offload. Signed-off-by: Wenjing Qiao <wenjing.q...@intel.com> Signed-off-by: Junfeng Guo <junfeng....@intel.com> --- drivers/net/idpf/idpf_ethdev.h | 3 ++ drivers/net/idpf/idpf_rxtx.c | 79 ++++++++++++++++++++++++++++++ drivers/net/idpf/idpf_rxtx.h | 89 +++++++++++++++++++++++++++++++++- 3 files changed, 170 insertions(+), 1 deletion(-)
diff --git a/drivers/net/idpf/idpf_ethdev.h b/drivers/net/idpf/idpf_ethdev.h index a32d5758ac..968e0e3cbf 100644 --- a/drivers/net/idpf/idpf_ethdev.h +++ b/drivers/net/idpf/idpf_ethdev.h @@ -184,6 +184,9 @@ struct idpf_adapter { bool tx_vec_allowed; bool rx_use_avx512; bool tx_use_avx512; + + /* For PTP */ + uint64_t time_hw; }; TAILQ_HEAD(idpf_adapter_list, idpf_adapter); diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c index e31d202646..b0037eca08 100644 --- a/drivers/net/idpf/idpf_rxtx.c +++ b/drivers/net/idpf/idpf_rxtx.c @@ -10,6 +10,8 @@ #include "idpf_rxtx.h" #include "idpf_rxtx_vec_common.h" +static int idpf_timestamp_dynfield_offset = -1; + const uint32_t * idpf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused) { @@ -965,6 +967,24 @@ idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, socket_id, tx_conf); } +static int +idpf_register_ts_mbuf(struct idpf_rx_queue *rxq) +{ + int err; + if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) { + /* Register mbuf field and flag for Rx timestamp */ + err = rte_mbuf_dyn_rx_timestamp_register( + &idpf_timestamp_dynfield_offset, + &idpf_timestamp_dynflag); + if (err) { + PMD_DRV_LOG(ERR, + "Cannot register mbuf field/flag for timestamp"); + return -EINVAL; + } + } + return 0; +} + static int idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq) { @@ -992,6 +1012,10 @@ idpf_alloc_single_rxq_mbufs(struct idpf_rx_queue *rxq) rxd = &((volatile struct virtchnl2_singleq_rx_buf_desc *)(rxq->rx_ring))[i]; rxd->pkt_addr = dma_addr; rxd->hdr_addr = 0; +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC + rxd->rsvd1 = 0; + rxd->rsvd2 = 0; +#endif rxq->sw_ring[i] = mbuf; } @@ -1057,6 +1081,13 @@ idpf_rx_queue_init(struct rte_eth_dev *dev, uint16_t rx_queue_id) return -EINVAL; } + err = idpf_register_ts_mbuf(rxq); + if (err) { + PMD_DRV_LOG(ERR, "fail to regidter timestamp mbuf %u", + rx_queue_id); + return -EIO; + } + if (!rxq->bufq1) { /* Single queue */ err = idpf_alloc_single_rxq_mbufs(rxq); @@ -1441,6 +1472,12 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, nb_rx = 0; rxq = (struct idpf_rx_queue *)rx_queue; +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC + uint64_t ts_ns; + struct iecm_hw *hw = &rxq->adapter->hw; + struct idpf_adapter *ad = rxq->adapter; +#endif + if (unlikely(!rxq) || unlikely(!rxq->q_started)) return nb_rx; @@ -1451,6 +1488,11 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, (volatile struct virtchnl2_rx_flex_desc_adv_nic_3 *)rxq->rx_ring; ptype_tbl = rxq->adapter->ptype_tbl; +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC + if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) + rxq->hw_register_set = 1; +#endif + while (nb_rx < nb_pkts) { rx_desc = &rx_desc_ring[rx_id]; @@ -1507,6 +1549,19 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, status_err0_qw1 = rx_desc->status_err0_qw1; pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1); pkt_flags |= idpf_splitq_rx_rss_offload(rxm, rx_desc); +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC + if (idpf_timestamp_dynflag > 0) { + /* timestamp */ + ts_ns = idpf_tstamp_convert_32b_64b(hw, ad, + rxq->hw_register_set, + rte_le_to_cpu_32(rx_desc->ts_high)); + rxq->hw_register_set = 0; + *RTE_MBUF_DYNFIELD(rxm, + idpf_timestamp_dynfield_offset, + rte_mbuf_timestamp_t *) = ts_ns; + rxm->ol_flags |= idpf_timestamp_dynflag; + } +#endif rxm->ol_flags |= pkt_flags; rx_pkts[nb_rx++] = rxm; @@ -1778,6 +1833,10 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, nb_hold = 0; rxq = rx_queue; +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC + uint64_t ts_ns; +#endif + if (unlikely(!rxq) || unlikely(!rxq->q_started)) return nb_rx; @@ -1785,6 +1844,13 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, rx_ring = rxq->rx_ring; ptype_tbl = rxq->adapter->ptype_tbl; +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC + struct iecm_hw *hw = &rxq->adapter->hw; + struct idpf_adapter *ad = rxq->adapter; + if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) + rxq->hw_register_set = 1; +#endif + while (nb_rx < nb_pkts) { rxdp = &rx_ring[rx_id]; rx_status0 = rte_le_to_cpu_16(rxdp->flex_nic_wb.status_error0); @@ -1841,6 +1907,19 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, rxm->packet_type = ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) & VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)]; +#ifndef RTE_LIBRTE_IDPF_16BYTE_RX_DESC + if (idpf_timestamp_dynflag > 0) { + /* timestamp */ + ts_ns = idpf_tstamp_convert_32b_64b(hw, ad, + rxq->hw_register_set, + rte_le_to_cpu_32(rxdp->flex_nic_wb.flex_ts.ts_high)); + rxq->hw_register_set = 0; + *RTE_MBUF_DYNFIELD(rxm, + idpf_timestamp_dynfield_offset, + rte_mbuf_timestamp_t *) = ts_ns; + rxm->ol_flags |= idpf_timestamp_dynflag; + } +#endif rx_pkts[nb_rx++] = rxm; } rxq->rx_tail = rx_id; diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h index decd0a98c2..6fcb441143 100644 --- a/drivers/net/idpf/idpf_rxtx.h +++ b/drivers/net/idpf/idpf_rxtx.h @@ -15,6 +15,41 @@ #include "base/virtchnl2_lan_desc.h" #include "idpf_ethdev.h" +/* MTS */ +#define GLTSYN_CMD_SYNC_0_0 (PF_TIMESYNC_BASE + 0x0) +#define PF_GLTSYN_SHTIME_0_0 (PF_TIMESYNC_BASE + 0x4) +#define PF_GLTSYN_SHTIME_L_0 (PF_TIMESYNC_BASE + 0x8) +#define PF_GLTSYN_SHTIME_H_0 (PF_TIMESYNC_BASE + 0xC) +#define GLTSYN_ART_L_0 (PF_TIMESYNC_BASE + 0x10) +#define GLTSYN_ART_H_0 (PF_TIMESYNC_BASE + 0x14) +#define PF_GLTSYN_SHTIME_0_1 (PF_TIMESYNC_BASE + 0x24) +#define PF_GLTSYN_SHTIME_L_1 (PF_TIMESYNC_BASE + 0x28) +#define PF_GLTSYN_SHTIME_H_1 (PF_TIMESYNC_BASE + 0x2C) +#define PF_GLTSYN_SHTIME_0_2 (PF_TIMESYNC_BASE + 0x44) +#define PF_GLTSYN_SHTIME_L_2 (PF_TIMESYNC_BASE + 0x48) +#define PF_GLTSYN_SHTIME_H_2 (PF_TIMESYNC_BASE + 0x4C) +#define PF_GLTSYN_SHTIME_0_3 (PF_TIMESYNC_BASE + 0x64) +#define PF_GLTSYN_SHTIME_L_3 (PF_TIMESYNC_BASE + 0x68) +#define PF_GLTSYN_SHTIME_H_3 (PF_TIMESYNC_BASE + 0x6C) + +#define PF_TIMESYNC_BAR4_BASE 0x0E400000 +#define GLTSYN_ENA (PF_TIMESYNC_BAR4_BASE + 0x90) +#define GLTSYN_CMD (PF_TIMESYNC_BAR4_BASE + 0x94) +#define GLTSYC_TIME_L (PF_TIMESYNC_BAR4_BASE + 0x104) +#define GLTSYC_TIME_H (PF_TIMESYNC_BAR4_BASE + 0x108) + +#define GLTSYN_CMD_SYNC_0_4 (PF_TIMESYNC_BAR4_BASE + 0x110) +#define PF_GLTSYN_SHTIME_L_4 (PF_TIMESYNC_BAR4_BASE + 0x118) +#define PF_GLTSYN_SHTIME_H_4 (PF_TIMESYNC_BAR4_BASE + 0x11C) +#define GLTSYN_INCVAL_L (PF_TIMESYNC_BAR4_BASE + 0x150) +#define GLTSYN_INCVAL_H (PF_TIMESYNC_BAR4_BASE + 0x154) +#define GLTSYN_SHADJ_L (PF_TIMESYNC_BAR4_BASE + 0x158) +#define GLTSYN_SHADJ_H (PF_TIMESYNC_BAR4_BASE + 0x15C) + +#define GLTSYN_CMD_SYNC_0_5 (PF_TIMESYNC_BAR4_BASE + 0x130) +#define PF_GLTSYN_SHTIME_L_5 (PF_TIMESYNC_BAR4_BASE + 0x138) +#define PF_GLTSYN_SHTIME_H_5 (PF_TIMESYNC_BAR4_BASE + 0x13C) + /* In QLEN must be whole number of 32 descriptors. */ #define IDPF_ALIGN_RING_DESC 32 #define IDPF_MIN_RING_DESC 32 @@ -66,6 +101,8 @@ (sizeof(struct virtchnl2_ptype) + \ (((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * sizeof((p)->proto_id[0]))) +extern uint64_t idpf_timestamp_dynflag; + struct idpf_rx_queue { struct idpf_adapter *adapter; /* the adapter this queue belongs to */ struct rte_mempool *mp; /* mbuf pool to populate Rx ring */ @@ -231,5 +268,55 @@ void idpf_set_tx_function(struct rte_eth_dev *dev); const uint32_t *idpf_dev_supported_ptypes_get(struct rte_eth_dev *dev); -#endif /* _IDPF_RXTX_H_ */ +#define IDPF_TIMESYNC_REG_WRAP_GUARD_BAND 10000 +/* Helper function to convert a 32b nanoseconds timestamp to 64b. */ +static inline uint64_t +idpf_tstamp_convert_32b_64b(struct iecm_hw *hw, struct idpf_adapter *ad, + uint32_t flag, uint32_t in_timestamp) +{ +/* TODO: timestamp for ACC */ +#ifdef RTE_ARCH_ARM64 + return 0; +#endif /* RTE_ARCH_ARM64 */ + +#ifdef RTE_ARCH_X86_64 + const uint64_t mask = 0xFFFFFFFF; + uint32_t hi, lo, lo2, delta; + uint64_t ns; + + if (flag) { + IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); + IECM_WRITE_REG(hw, GLTSYN_CMD_SYNC_0_0, PF_GLTSYN_CMD_SYNC_EXEC_CMD_M | + PF_GLTSYN_CMD_SYNC_SHTIME_EN_M); + lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); + hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); + /* + * On typical system, the delta between lo and lo2 is ~1000ns, + * so 10000 seems a large-enough but not overly-big guard band. + */ + if (lo > (UINT32_MAX - IDPF_TIMESYNC_REG_WRAP_GUARD_BAND)) + lo2 = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); + else + lo2 = lo; + + if (lo2 < lo) { + lo = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_L_0); + hi = IECM_READ_REG(hw, PF_GLTSYN_SHTIME_H_0); + } + + ad->time_hw = ((uint64_t)hi << 32) | lo; + } + + delta = (in_timestamp - (uint32_t)(ad->time_hw & mask)); + if (delta > (mask / 2)) { + delta = ((uint32_t)(ad->time_hw & mask) - in_timestamp); + ns = ad->time_hw - delta; + } else { + ns = ad->time_hw + delta; + } + + return ns; +#endif /* RTE_ARCH_X86_64 */ +} +#endif /* _IDPF_RXTX_H_ */ -- 2.25.1