Reorganize structure fields for better cache locality. Remove RX software ring (sw_ring) to reduce memory allocation and copy.
Signed-off-by: Junlong Wang <[email protected]> --- drivers/net/zxdh/zxdh_ethdev.c | 33 +-------- drivers/net/zxdh/zxdh_pci.c | 2 +- drivers/net/zxdh/zxdh_queue.c | 11 ++- drivers/net/zxdh/zxdh_queue.h | 120 ++++++++++++++++----------------- drivers/net/zxdh/zxdh_rxtx.c | 22 +++--- 5 files changed, 77 insertions(+), 111 deletions(-) diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c index aeb01f4652..08119e28c7 100644 --- a/drivers/net/zxdh/zxdh_ethdev.c +++ b/drivers/net/zxdh/zxdh_ethdev.c @@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx) struct zxdh_virtnet_tx *txvq = NULL; struct zxdh_virtqueue *vq = NULL; size_t sz_hdr_mz = 0; - void *sw_ring = NULL; int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx); int32_t numa_node = dev->device->numa_node; uint16_t vtpci_phy_qidx = 0; @@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx) vq->vq_queue_index = vtpci_phy_qidx; vq->vq_nentries = vq_size; - vq->vq_packed.used_wrap_counter = 1; - vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL; - vq->vq_packed.event_flags_shadow = 0; + vq->used_wrap_counter = 1; + vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL; if (queue_type == ZXDH_VTNET_RQ) - vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE; + vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE; /* * Reserve a memzone for vring elements @@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx) } if (queue_type == ZXDH_VTNET_RQ) { - size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * sizeof(vq->sw_ring[0]); - - sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, RTE_CACHE_LINE_SIZE, numa_node); - if (!sw_ring) { - PMD_DRV_LOG(ERR, "can not allocate RX soft ring"); - ret = -ENOMEM; - goto fail_q_alloc; - } - - vq->sw_ring = sw_ring; rxvq = &vq->rxq; rxvq->vq = vq; rxvq->port_id = dev->data->port_id; @@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx) txvq->zxdh_net_hdr_mem = hdr_mz->iova; } - vq->offset = offsetof(struct rte_mbuf, buf_iova); if (queue_type == ZXDH_VTNET_TQ) { struct zxdh_tx_region *txr = hdr_mz->addr; - uint32_t i; - memset(txr, 0, vq_size * sizeof(*txr)); - for (i = 0; i < vq_size; i++) { - /* first indirect descriptor is always the tx header */ - struct zxdh_vring_packed_desc *start_dp = txr[i].tx_packed_indir; - - zxdh_vring_desc_init_indirect_packed(start_dp, - RTE_DIM(txr[i].tx_packed_indir)); - start_dp->addr = txvq->zxdh_net_hdr_mem + i * sizeof(*txr) + - offsetof(struct zxdh_tx_region, tx_hdr); - /* length will be updated to actual pi hdr size when xmit pkt */ - start_dp->len = 0; - } } if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) { PMD_DRV_LOG(ERR, "setup_queue failed"); @@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_logic_qidx) } return 0; fail_q_alloc: - rte_free(sw_ring); rte_memzone_free(hdr_mz); rte_memzone_free(mz); rte_free(vq); diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c index 4ba31905fc..0bc27ed111 100644 --- a/drivers/net/zxdh/zxdh_pci.c +++ b/drivers/net/zxdh/zxdh_pci.c @@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue *vq) notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index; if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) && - (vq->vq_packed.cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL)) + (vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL)) notify_data |= RTE_BIT32(31); PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p", diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c index 7162593b16..4668cb5d13 100644 --- a/drivers/net/zxdh/zxdh_queue.c +++ b/drivers/net/zxdh/zxdh_queue.c @@ -407,7 +407,7 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq, { struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc; struct zxdh_vq_desc_extra *dxp; - uint16_t flags = vq->vq_packed.cached_flags; + uint16_t flags = vq->cached_flags; int32_t i; uint16_t idx; @@ -415,7 +415,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq, idx = vq->vq_avail_idx; dxp = &vq->vq_descx[idx]; dxp->cookie = (void *)cookie[i]; - dxp->ndescs = 1; /* rx pkt fill in data_off */ start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + RTE_PKTMBUF_HEADROOM; start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM; @@ -423,8 +422,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct zxdh_virtqueue *vq, zxdh_queue_store_flags_packed(&start_dp[idx], flags); if (++vq->vq_avail_idx >= vq->vq_nentries) { vq->vq_avail_idx -= vq->vq_nentries; - vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; - flags = vq->vq_packed.cached_flags; + vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; + flags = vq->cached_flags; } } vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); @@ -467,7 +466,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq) int32_t cnt = 0; i = vq->vq_used_cons_idx; - while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) { + while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) { dxp = &vq->vq_descx[descs[i].id]; if (dxp->cookie != NULL) { rte_pktmbuf_free(dxp->cookie); @@ -477,7 +476,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq) vq->vq_used_cons_idx++; if (vq->vq_used_cons_idx >= vq->vq_nentries) { vq->vq_used_cons_idx -= vq->vq_nentries; - vq->vq_packed.used_wrap_counter ^= 1; + vq->used_wrap_counter ^= 1; } i = vq->vq_used_cons_idx; } diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h index 1a0c8a0d90..b079272162 100644 --- a/drivers/net/zxdh/zxdh_queue.h +++ b/drivers/net/zxdh/zxdh_queue.h @@ -9,6 +9,7 @@ #include <rte_common.h> #include <rte_atomic.h> +#include <rte_io.h> #include "zxdh_ethdev.h" #include "zxdh_rxtx.h" @@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event { }; struct zxdh_vring_packed { - uint32_t num; struct zxdh_vring_packed_desc *desc; struct zxdh_vring_packed_desc_event *driver; struct zxdh_vring_packed_desc_event *device; @@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra { uint16_t next; }; +struct zxdh_vring { + uint32_t num; + struct zxdh_vring_desc *desc; + struct zxdh_vring_avail *avail; + struct zxdh_vring_used *used; +}; + struct zxdh_virtqueue { + union { + struct { + struct zxdh_vring ring; /**< vring keeping desc, used and avail */ + } vq_split; + struct __rte_packed_begin { + struct zxdh_vring_packed ring; + } __rte_packed_end vq_packed; + }; struct zxdh_hw *hw; /* < zxdh_hw structure pointer. */ - struct { - /* vring keeping descs and events */ - struct zxdh_vring_packed ring; - uint8_t used_wrap_counter; - uint8_t rsv; - uint16_t cached_flags; /* < cached flags for descs */ - uint16_t event_flags_shadow; - uint16_t rsv1; - } vq_packed; - - uint16_t vq_used_cons_idx; /* < last consumed descriptor */ - uint16_t vq_nentries; /* < vring desc numbers */ - uint16_t vq_free_cnt; /* < num of desc available */ - uint16_t vq_avail_idx; /* < sync until needed */ - uint16_t vq_free_thresh; /* < free threshold */ - uint16_t rsv2; - - void *vq_ring_virt_mem; /* < linear address of vring */ - uint32_t vq_ring_size; + uint16_t vq_used_cons_idx; /**< last consumed descriptor */ + uint16_t vq_avail_idx; /**< sync until needed */ + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_free_cnt; /**< num of desc available */ + + uint16_t cached_flags; /**< cached flags for descs */ + uint8_t used_wrap_counter; + uint8_t rsv; + uint16_t vq_free_thresh; /**< free threshold */ + uint16_t next_qidx; + + void *notify_addr; union { struct zxdh_virtnet_rx rxq; struct zxdh_virtnet_tx txq; }; - /* - * physical address of vring, or virtual address - */ - rte_iova_t vq_ring_mem; + uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */ + uint16_t event_flags_shadow; + uint32_t vq_ring_size; - /* + /** * Head of the free chain in the descriptor table. If * there are no free descriptors, this will be set to * VQ_RING_DESC_CHAIN_END. - */ + **/ uint16_t vq_desc_head_idx; uint16_t vq_desc_tail_idx; - uint16_t vq_queue_index; /* < PCI queue index */ - uint16_t offset; /* < relative offset to obtain addr in mbuf */ - uint16_t *notify_addr; - struct rte_mbuf **sw_ring; /* < RX software ring. */ + uint32_t rsv_8B; + + void *vq_ring_virt_mem; /**< linear address of vring*/ + /* physical address of vring, or virtual address for virtio_user. */ + rte_iova_t vq_ring_mem; + struct zxdh_vq_desc_extra vq_descx[]; }; @@ -296,10 +305,9 @@ static inline void zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p, unsigned long align, uint32_t num) { - vr->num = num; vr->desc = (struct zxdh_vring_packed_desc *)p; vr->driver = (struct zxdh_vring_packed_desc_event *)(p + - vr->num * sizeof(struct zxdh_vring_packed_desc)); + num * sizeof(struct zxdh_vring_packed_desc)); vr->device = (struct zxdh_vring_packed_desc_event *)RTE_ALIGN_CEIL(((uintptr_t)vr->driver + sizeof(struct zxdh_vring_packed_desc_event)), align); } @@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct zxdh_vring_packed_desc *dp, int32_t static inline void zxdh_queue_disable_intr(struct zxdh_virtqueue *vq) { - if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) { - vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE; - vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow; + if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) { + vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE; + vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow; } } static inline void zxdh_queue_enable_intr(struct zxdh_virtqueue *vq) { - if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) { - vq->vq_packed.event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE; - vq->vq_packed.ring.driver->desc_event_flags = vq->vq_packed.event_flags_shadow; + if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) { + vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE; + vq->vq_packed.ring.driver->desc_event_flags = vq->event_flags_shadow; } } -static inline void -zxdh_mb(uint8_t weak_barriers) -{ - if (weak_barriers) - rte_atomic_thread_fence(rte_memory_order_seq_cst); - else - rte_mb(); -} - static inline int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq) { @@ -365,7 +364,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue rte_io_rmb(); used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED); avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL); - return avail == used && used == vq->vq_packed.used_wrap_counter; + return avail == used && used == vq->used_wrap_counter; } static inline int32_t @@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct zxdh_vring_packed_desc *dp, uint16_t flags) dp->flags = flags; } -static inline int32_t -zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq) -{ - uint16_t flags; - uint16_t used, avail; - - flags = desc->flags; - rte_io_rmb(); - used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED); - avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL); - return avail == used && used == vq->vq_packed.used_wrap_counter; -} - static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq) { - ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); + /* Bit[0:15]: vq queue index + * Bit[16:30]: avail index + * Bit[31]: avail wrap counter + */ + uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags & + ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) | + ((uint32_t)vq->vq_avail_idx << 16) | + vq->vq_queue_index; + rte_write32(notify_data, vq->notify_addr); } static inline int32_t @@ -404,7 +398,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq) { uint16_t flags = 0; - zxdh_mb(1); + rte_mb(); flags = vq->vq_packed.ring.device->desc_event_flags; return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE); diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c index db86922aea..93506a4b49 100644 --- a/drivers/net/zxdh/zxdh_rxtx.c +++ b/drivers/net/zxdh/zxdh_rxtx.c @@ -216,7 +216,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num) /* desc_is_used has a load-acquire or rte_io_rmb inside * and wait for used desc in virtqueue. */ - while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) { + while (num > 0 && desc_is_used(&desc[used_idx], vq)) { id = desc[used_idx].id; do { curr_id = used_idx; @@ -226,7 +226,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, int32_t num) num -= dxp->ndescs; if (used_idx >= size) { used_idx -= size; - vq->vq_packed.used_wrap_counter ^= 1; + vq->used_wrap_counter ^= 1; } if (dxp->cookie != NULL) { rte_pktmbuf_free(dxp->cookie); @@ -340,7 +340,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq, struct zxdh_virtqueue *vq = txvq->vq; uint16_t id = vq->vq_avail_idx; struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id]; - uint16_t flags = vq->vq_packed.cached_flags; + uint16_t flags = vq->cached_flags; struct zxdh_net_hdr_dl *hdr = NULL; uint8_t hdr_len = vq->hw->dl_net_hdr_len; struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id]; @@ -355,7 +355,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq, dp->id = id; if (++vq->vq_avail_idx >= vq->vq_nentries) { vq->vq_avail_idx -= vq->vq_nentries; - vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; + vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; } vq->vq_free_cnt--; zxdh_queue_store_flags_packed(dp, flags); @@ -381,7 +381,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq, dxp->ndescs = needed; dxp->cookie = cookie; - head_flags |= vq->vq_packed.cached_flags; + head_flags |= vq->cached_flags; start_dp[idx].addr = txvq->zxdh_net_hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); start_dp[idx].len = hdr_len; @@ -392,7 +392,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq, idx++; if (idx >= vq->vq_nentries) { idx -= vq->vq_nentries; - vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; + vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; } zxdh_xmit_fill_net_hdr(vq, cookie, hdr); @@ -404,14 +404,14 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq, if (likely(idx != head_idx)) { uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT : 0; - flags |= vq->vq_packed.cached_flags; + flags |= vq->cached_flags; start_dp[idx].flags = flags; } idx++; if (idx >= vq->vq_nentries) { idx -= vq->vq_nentries; - vq->vq_packed.cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; + vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED; } } while ((cookie = cookie->next) != NULL); @@ -480,7 +480,7 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq) free_cnt += dxp->ndescs; if (used_idx >= size) { used_idx -= size; - vq->vq_packed.used_wrap_counter ^= 1; + vq->used_wrap_counter ^= 1; } if (dxp->cookie != NULL) { rte_pktmbuf_free(dxp->cookie); @@ -619,7 +619,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq, * desc_is_used has a load-acquire or rte_io_rmb inside * and wait for used desc in virtqueue. */ - if (!zxdh_desc_used(&desc[used_idx], vq)) + if (!desc_is_used(&desc[used_idx], vq)) return i; len[i] = desc[used_idx].len; id = desc[used_idx].id; @@ -637,7 +637,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq, vq->vq_used_cons_idx++; if (vq->vq_used_cons_idx >= vq->vq_nentries) { vq->vq_used_cons_idx -= vq->vq_nentries; - vq->vq_packed.used_wrap_counter ^= 1; + vq->used_wrap_counter ^= 1; } } return i; -- 2.27.0

