Reorganize structure fields for better cache locality.
Remove RX software ring (sw_ring) to reduce memory allocation and
copy.

Signed-off-by: Junlong Wang <[email protected]>
---
 drivers/net/zxdh/zxdh_ethdev.c |  33 +--------
 drivers/net/zxdh/zxdh_pci.c    |   2 +-
 drivers/net/zxdh/zxdh_queue.c  |  11 ++-
 drivers/net/zxdh/zxdh_queue.h  | 120 ++++++++++++++++-----------------
 drivers/net/zxdh/zxdh_rxtx.c   |  22 +++---
 5 files changed, 77 insertions(+), 111 deletions(-)

diff --git a/drivers/net/zxdh/zxdh_ethdev.c b/drivers/net/zxdh/zxdh_ethdev.c
index aeb01f4652..08119e28c7 100644
--- a/drivers/net/zxdh/zxdh_ethdev.c
+++ b/drivers/net/zxdh/zxdh_ethdev.c
@@ -644,7 +644,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_logic_qidx)
        struct zxdh_virtnet_tx *txvq = NULL;
        struct zxdh_virtqueue *vq = NULL;
        size_t sz_hdr_mz = 0;
-       void *sw_ring = NULL;
        int32_t queue_type = zxdh_get_queue_type(vtpci_logic_qidx);
        int32_t numa_node = dev->device->numa_node;
        uint16_t vtpci_phy_qidx = 0;
@@ -692,11 +691,10 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_logic_qidx)
        vq->vq_queue_index = vtpci_phy_qidx;
        vq->vq_nentries = vq_size;
 
-       vq->vq_packed.used_wrap_counter = 1;
-       vq->vq_packed.cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
-       vq->vq_packed.event_flags_shadow = 0;
+       vq->used_wrap_counter = 1;
+       vq->cached_flags = ZXDH_VRING_PACKED_DESC_F_AVAIL;
        if (queue_type == ZXDH_VTNET_RQ)
-               vq->vq_packed.cached_flags |= ZXDH_VRING_DESC_F_WRITE;
+               vq->cached_flags |= ZXDH_VRING_DESC_F_WRITE;
 
        /*
         * Reserve a memzone for vring elements
@@ -741,16 +739,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_logic_qidx)
        }
 
        if (queue_type == ZXDH_VTNET_RQ) {
-               size_t sz_sw = (ZXDH_MBUF_BURST_SZ + vq_size) * 
sizeof(vq->sw_ring[0]);
-
-               sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, 
RTE_CACHE_LINE_SIZE, numa_node);
-               if (!sw_ring) {
-                       PMD_DRV_LOG(ERR, "can not allocate RX soft ring");
-                       ret = -ENOMEM;
-                       goto fail_q_alloc;
-               }
-
-               vq->sw_ring = sw_ring;
                rxvq = &vq->rxq;
                rxvq->vq = vq;
                rxvq->port_id = dev->data->port_id;
@@ -764,23 +752,9 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_logic_qidx)
                txvq->zxdh_net_hdr_mem = hdr_mz->iova;
        }
 
-       vq->offset = offsetof(struct rte_mbuf, buf_iova);
        if (queue_type == ZXDH_VTNET_TQ) {
                struct zxdh_tx_region *txr = hdr_mz->addr;
-               uint32_t i;
-
                memset(txr, 0, vq_size * sizeof(*txr));
-               for (i = 0; i < vq_size; i++) {
-                       /* first indirect descriptor is always the tx header */
-                       struct zxdh_vring_packed_desc *start_dp = 
txr[i].tx_packed_indir;
-
-                       zxdh_vring_desc_init_indirect_packed(start_dp,
-                                       RTE_DIM(txr[i].tx_packed_indir));
-                       start_dp->addr = txvq->zxdh_net_hdr_mem + i * 
sizeof(*txr) +
-                                       offsetof(struct zxdh_tx_region, tx_hdr);
-                       /* length will be updated to actual pi hdr size when 
xmit pkt */
-                       start_dp->len = 0;
-               }
        }
        if (ZXDH_VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
                PMD_DRV_LOG(ERR, "setup_queue failed");
@@ -788,7 +762,6 @@ zxdh_init_queue(struct rte_eth_dev *dev, uint16_t 
vtpci_logic_qidx)
        }
        return 0;
 fail_q_alloc:
-       rte_free(sw_ring);
        rte_memzone_free(hdr_mz);
        rte_memzone_free(mz);
        rte_free(vq);
diff --git a/drivers/net/zxdh/zxdh_pci.c b/drivers/net/zxdh/zxdh_pci.c
index 4ba31905fc..0bc27ed111 100644
--- a/drivers/net/zxdh/zxdh_pci.c
+++ b/drivers/net/zxdh/zxdh_pci.c
@@ -231,7 +231,7 @@ zxdh_notify_queue(struct zxdh_hw *hw, struct zxdh_virtqueue 
*vq)
 
        notify_data = ((uint32_t)vq->vq_avail_idx << 16) | vq->vq_queue_index;
        if (zxdh_pci_with_feature(hw, ZXDH_F_RING_PACKED) &&
-                       (vq->vq_packed.cached_flags & 
ZXDH_VRING_PACKED_DESC_F_AVAIL))
+                       (vq->cached_flags & ZXDH_VRING_PACKED_DESC_F_AVAIL))
                notify_data |= RTE_BIT32(31);
 
        PMD_DRV_LOG(DEBUG, "queue:%d notify_data 0x%x notify_addr 0x%p",
diff --git a/drivers/net/zxdh/zxdh_queue.c b/drivers/net/zxdh/zxdh_queue.c
index 7162593b16..4668cb5d13 100644
--- a/drivers/net/zxdh/zxdh_queue.c
+++ b/drivers/net/zxdh/zxdh_queue.c
@@ -407,7 +407,7 @@ int32_t zxdh_enqueue_recv_refill_packed(struct 
zxdh_virtqueue *vq,
 {
        struct zxdh_vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
        struct zxdh_vq_desc_extra *dxp;
-       uint16_t flags = vq->vq_packed.cached_flags;
+       uint16_t flags = vq->cached_flags;
        int32_t i;
        uint16_t idx;
 
@@ -415,7 +415,6 @@ int32_t zxdh_enqueue_recv_refill_packed(struct 
zxdh_virtqueue *vq,
                idx = vq->vq_avail_idx;
                dxp = &vq->vq_descx[idx];
                dxp->cookie = (void *)cookie[i];
-               dxp->ndescs = 1;
                /* rx pkt fill in data_off */
                start_dp[idx].addr = rte_mbuf_iova_get(cookie[i]) + 
RTE_PKTMBUF_HEADROOM;
                start_dp[idx].len = cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM;
@@ -423,8 +422,8 @@ int32_t zxdh_enqueue_recv_refill_packed(struct 
zxdh_virtqueue *vq,
                zxdh_queue_store_flags_packed(&start_dp[idx], flags);
                if (++vq->vq_avail_idx >= vq->vq_nentries) {
                        vq->vq_avail_idx -= vq->vq_nentries;
-                       vq->vq_packed.cached_flags ^= 
ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
-                       flags = vq->vq_packed.cached_flags;
+                       vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+                       flags = vq->cached_flags;
                }
        }
        vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
@@ -467,7 +466,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
        int32_t cnt = 0;
 
        i = vq->vq_used_cons_idx;
-       while (zxdh_desc_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
+       while (desc_is_used(&descs[i], vq) && cnt++ < vq->vq_nentries) {
                dxp = &vq->vq_descx[descs[i].id];
                if (dxp->cookie != NULL) {
                        rte_pktmbuf_free(dxp->cookie);
@@ -477,7 +476,7 @@ void zxdh_queue_rxvq_flush(struct zxdh_virtqueue *vq)
                vq->vq_used_cons_idx++;
                if (vq->vq_used_cons_idx >= vq->vq_nentries) {
                        vq->vq_used_cons_idx -= vq->vq_nentries;
-                       vq->vq_packed.used_wrap_counter ^= 1;
+                       vq->used_wrap_counter ^= 1;
                }
                i = vq->vq_used_cons_idx;
        }
diff --git a/drivers/net/zxdh/zxdh_queue.h b/drivers/net/zxdh/zxdh_queue.h
index 1a0c8a0d90..b079272162 100644
--- a/drivers/net/zxdh/zxdh_queue.h
+++ b/drivers/net/zxdh/zxdh_queue.h
@@ -9,6 +9,7 @@
 
 #include <rte_common.h>
 #include <rte_atomic.h>
+#include <rte_io.h>
 
 #include "zxdh_ethdev.h"
 #include "zxdh_rxtx.h"
@@ -117,7 +118,6 @@ struct zxdh_vring_packed_desc_event {
 };
 
 struct zxdh_vring_packed {
-       uint32_t num;
        struct zxdh_vring_packed_desc *desc;
        struct zxdh_vring_packed_desc_event *driver;
        struct zxdh_vring_packed_desc_event *device;
@@ -129,50 +129,59 @@ struct zxdh_vq_desc_extra {
        uint16_t next;
 };
 
+struct zxdh_vring {
+       uint32_t num;
+       struct zxdh_vring_desc  *desc;
+       struct zxdh_vring_avail *avail;
+       struct zxdh_vring_used  *used;
+};
+
 struct zxdh_virtqueue {
+       union {
+               struct {
+                       struct zxdh_vring ring; /**< vring keeping desc, used 
and avail */
+               } vq_split;
+               struct __rte_packed_begin {
+                       struct zxdh_vring_packed ring;
+               } __rte_packed_end vq_packed;
+       };
        struct zxdh_hw  *hw; /* < zxdh_hw structure pointer. */
 
-       struct {
-               /* vring keeping descs and events */
-               struct zxdh_vring_packed ring;
-               uint8_t used_wrap_counter;
-               uint8_t rsv;
-               uint16_t cached_flags; /* < cached flags for descs */
-               uint16_t event_flags_shadow;
-               uint16_t rsv1;
-       } vq_packed;
-
-       uint16_t vq_used_cons_idx; /* < last consumed descriptor */
-       uint16_t vq_nentries;  /* < vring desc numbers */
-       uint16_t vq_free_cnt;  /* < num of desc available */
-       uint16_t vq_avail_idx; /* < sync until needed */
-       uint16_t vq_free_thresh; /* < free threshold */
-       uint16_t rsv2;
-
-       void *vq_ring_virt_mem;  /* < linear address of vring */
-       uint32_t vq_ring_size;
+       uint16_t vq_used_cons_idx; /**< last consumed descriptor */
+       uint16_t vq_avail_idx; /**< sync until needed */
+       uint16_t vq_nentries;  /**< vring desc numbers */
+       uint16_t vq_free_cnt;  /**< num of desc available */
+
+       uint16_t cached_flags; /**< cached flags for descs */
+       uint8_t used_wrap_counter;
+       uint8_t rsv;
+       uint16_t vq_free_thresh; /**< free threshold */
+       uint16_t next_qidx;
+
+       void *notify_addr;
 
        union {
                struct zxdh_virtnet_rx rxq;
                struct zxdh_virtnet_tx txq;
        };
 
-       /*
-        * physical address of vring, or virtual address
-        */
-       rte_iova_t vq_ring_mem;
+       uint16_t vq_queue_index; /* PACKED: phy_idx, SPLIT: logic_idx */
+       uint16_t event_flags_shadow;
+       uint32_t vq_ring_size;
 
-       /*
+       /**
         * Head of the free chain in the descriptor table. If
         * there are no free descriptors, this will be set to
         * VQ_RING_DESC_CHAIN_END.
-        */
+        **/
        uint16_t  vq_desc_head_idx;
        uint16_t  vq_desc_tail_idx;
-       uint16_t  vq_queue_index;   /* < PCI queue index */
-       uint16_t  offset; /* < relative offset to obtain addr in mbuf */
-       uint16_t *notify_addr;
-       struct rte_mbuf **sw_ring;  /* < RX software ring. */
+       uint32_t rsv_8B;
+
+       void *vq_ring_virt_mem;  /**< linear address of vring*/
+       /* physical address of vring, or virtual address for virtio_user. */
+       rte_iova_t vq_ring_mem;
+
        struct zxdh_vq_desc_extra vq_descx[];
 };
 
@@ -296,10 +305,9 @@ static inline void
 zxdh_vring_init_packed(struct zxdh_vring_packed *vr, uint8_t *p,
                unsigned long align, uint32_t num)
 {
-       vr->num    = num;
        vr->desc   = (struct zxdh_vring_packed_desc *)p;
        vr->driver = (struct zxdh_vring_packed_desc_event *)(p +
-                                vr->num * sizeof(struct 
zxdh_vring_packed_desc));
+                                num * sizeof(struct zxdh_vring_packed_desc));
        vr->device = (struct zxdh_vring_packed_desc_event 
*)RTE_ALIGN_CEIL(((uintptr_t)vr->driver +
                                 sizeof(struct zxdh_vring_packed_desc_event)), 
align);
 }
@@ -331,30 +339,21 @@ zxdh_vring_desc_init_indirect_packed(struct 
zxdh_vring_packed_desc *dp, int32_t
 static inline void
 zxdh_queue_disable_intr(struct zxdh_virtqueue *vq)
 {
-       if (vq->vq_packed.event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
-               vq->vq_packed.event_flags_shadow = 
ZXDH_RING_EVENT_FLAGS_DISABLE;
-               vq->vq_packed.ring.driver->desc_event_flags = 
vq->vq_packed.event_flags_shadow;
+       if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_DISABLE) {
+               vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_DISABLE;
+               vq->vq_packed.ring.driver->desc_event_flags = 
vq->event_flags_shadow;
        }
 }
 
 static inline void
 zxdh_queue_enable_intr(struct zxdh_virtqueue *vq)
 {
-       if (vq->vq_packed.event_flags_shadow == ZXDH_RING_EVENT_FLAGS_DISABLE) {
-               vq->vq_packed.event_flags_shadow = 
ZXDH_RING_EVENT_FLAGS_DISABLE;
-               vq->vq_packed.ring.driver->desc_event_flags = 
vq->vq_packed.event_flags_shadow;
+       if (vq->event_flags_shadow != ZXDH_RING_EVENT_FLAGS_ENABLE) {
+               vq->event_flags_shadow = ZXDH_RING_EVENT_FLAGS_ENABLE;
+               vq->vq_packed.ring.driver->desc_event_flags = 
vq->event_flags_shadow;
        }
 }
 
-static inline void
-zxdh_mb(uint8_t weak_barriers)
-{
-       if (weak_barriers)
-               rte_atomic_thread_fence(rte_memory_order_seq_cst);
-       else
-               rte_mb();
-}
-
 static inline
 int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, struct 
zxdh_virtqueue *vq)
 {
@@ -365,7 +364,7 @@ int32_t desc_is_used(struct zxdh_vring_packed_desc *desc, 
struct zxdh_virtqueue
        rte_io_rmb();
        used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
        avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
-       return avail == used && used == vq->vq_packed.used_wrap_counter;
+       return avail == used && used == vq->used_wrap_counter;
 }
 
 static inline int32_t
@@ -381,22 +380,17 @@ zxdh_queue_store_flags_packed(struct 
zxdh_vring_packed_desc *dp, uint16_t flags)
        dp->flags = flags;
 }
 
-static inline int32_t
-zxdh_desc_used(struct zxdh_vring_packed_desc *desc, struct zxdh_virtqueue *vq)
-{
-       uint16_t flags;
-       uint16_t used, avail;
-
-       flags = desc->flags;
-       rte_io_rmb();
-       used = !!(flags & ZXDH_VRING_PACKED_DESC_F_USED);
-       avail = !!(flags & ZXDH_VRING_PACKED_DESC_F_AVAIL);
-       return avail == used && used == vq->vq_packed.used_wrap_counter;
-}
-
 static inline void zxdh_queue_notify(struct zxdh_virtqueue *vq)
 {
-       ZXDH_VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
+       /* Bit[0:15]: vq queue index
+        * Bit[16:30]: avail index
+        * Bit[31]: avail wrap counter
+        */
+       uint32_t notify_data = ((uint32_t)(!!(vq->cached_flags &
+               ZXDH_VRING_PACKED_DESC_F_AVAIL)) << 31) |
+               ((uint32_t)vq->vq_avail_idx << 16) |
+               vq->vq_queue_index;
+       rte_write32(notify_data, vq->notify_addr);
 }
 
 static inline int32_t
@@ -404,7 +398,7 @@ zxdh_queue_kick_prepare_packed(struct zxdh_virtqueue *vq)
 {
        uint16_t flags = 0;
 
-       zxdh_mb(1);
+       rte_mb();
        flags = vq->vq_packed.ring.device->desc_event_flags;
 
        return (flags != ZXDH_RING_EVENT_FLAGS_DISABLE);
diff --git a/drivers/net/zxdh/zxdh_rxtx.c b/drivers/net/zxdh/zxdh_rxtx.c
index db86922aea..93506a4b49 100644
--- a/drivers/net/zxdh/zxdh_rxtx.c
+++ b/drivers/net/zxdh/zxdh_rxtx.c
@@ -216,7 +216,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, 
int32_t num)
        /* desc_is_used has a load-acquire or rte_io_rmb inside
         * and wait for used desc in virtqueue.
         */
-       while (num > 0 && zxdh_desc_used(&desc[used_idx], vq)) {
+       while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
                id = desc[used_idx].id;
                do {
                        curr_id = used_idx;
@@ -226,7 +226,7 @@ zxdh_xmit_cleanup_inorder_packed(struct zxdh_virtqueue *vq, 
int32_t num)
                        num -= dxp->ndescs;
                        if (used_idx >= size) {
                                used_idx -= size;
-                               vq->vq_packed.used_wrap_counter ^= 1;
+                               vq->used_wrap_counter ^= 1;
                        }
                        if (dxp->cookie != NULL) {
                                rte_pktmbuf_free(dxp->cookie);
@@ -340,7 +340,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
        struct zxdh_virtqueue *vq = txvq->vq;
        uint16_t id = vq->vq_avail_idx;
        struct zxdh_vq_desc_extra *dxp = &vq->vq_descx[id];
-       uint16_t flags = vq->vq_packed.cached_flags;
+       uint16_t flags = vq->cached_flags;
        struct zxdh_net_hdr_dl *hdr = NULL;
        uint8_t hdr_len = vq->hw->dl_net_hdr_len;
        struct zxdh_vring_packed_desc *dp = &vq->vq_packed.ring.desc[id];
@@ -355,7 +355,7 @@ zxdh_enqueue_xmit_packed_fast(struct zxdh_virtnet_tx *txvq,
        dp->id   = id;
        if (++vq->vq_avail_idx >= vq->vq_nentries) {
                vq->vq_avail_idx -= vq->vq_nentries;
-               vq->vq_packed.cached_flags ^= 
ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+               vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
        }
        vq->vq_free_cnt--;
        zxdh_queue_store_flags_packed(dp, flags);
@@ -381,7 +381,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
 
        dxp->ndescs = needed;
        dxp->cookie = cookie;
-       head_flags |= vq->vq_packed.cached_flags;
+       head_flags |= vq->cached_flags;
 
        start_dp[idx].addr = txvq->zxdh_net_hdr_mem + 
RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
        start_dp[idx].len  = hdr_len;
@@ -392,7 +392,7 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
        idx++;
        if (idx >= vq->vq_nentries) {
                idx -= vq->vq_nentries;
-               vq->vq_packed.cached_flags ^= 
ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+               vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
        }
 
        zxdh_xmit_fill_net_hdr(vq, cookie, hdr);
@@ -404,14 +404,14 @@ zxdh_enqueue_xmit_packed(struct zxdh_virtnet_tx *txvq,
                if (likely(idx != head_idx)) {
                        uint16_t flags = cookie->next ? ZXDH_VRING_DESC_F_NEXT 
: 0;
 
-                       flags |= vq->vq_packed.cached_flags;
+                       flags |= vq->cached_flags;
                        start_dp[idx].flags = flags;
                }
 
                idx++;
                if (idx >= vq->vq_nentries) {
                        idx -= vq->vq_nentries;
-                       vq->vq_packed.cached_flags ^= 
ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
+                       vq->cached_flags ^= ZXDH_VRING_PACKED_DESC_F_AVAIL_USED;
                }
        } while ((cookie = cookie->next) != NULL);
 
@@ -480,7 +480,7 @@ zxdh_xmit_flush(struct zxdh_virtqueue *vq)
                        free_cnt += dxp->ndescs;
                        if (used_idx >= size) {
                                used_idx -= size;
-                               vq->vq_packed.used_wrap_counter ^= 1;
+                               vq->used_wrap_counter ^= 1;
                        }
                        if (dxp->cookie != NULL) {
                                rte_pktmbuf_free(dxp->cookie);
@@ -619,7 +619,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
                 * desc_is_used has a load-acquire or rte_io_rmb inside
                 * and wait for used desc in virtqueue.
                 */
-               if (!zxdh_desc_used(&desc[used_idx], vq))
+               if (!desc_is_used(&desc[used_idx], vq))
                        return i;
                len[i] = desc[used_idx].len;
                id = desc[used_idx].id;
@@ -637,7 +637,7 @@ zxdh_dequeue_burst_rx_packed(struct zxdh_virtqueue *vq,
                vq->vq_used_cons_idx++;
                if (vq->vq_used_cons_idx >= vq->vq_nentries) {
                        vq->vq_used_cons_idx -= vq->vq_nentries;
-                       vq->vq_packed.used_wrap_counter ^= 1;
+                       vq->used_wrap_counter ^= 1;
                }
        }
        return i;
-- 
2.27.0

Reply via email to