Move the next pointer to the first cacheline of the rte_mbuf structure
and move the offload values to the second cacheline to give better
performance to applications using chained mbufs.

Enabled by a configuration option CONFIG_RTE_MBUF_CHAIN_FRIENDLY default
is set to No.

Signed-off-by: Keith Wiles <keith.wiles at intel.com>
---
 config/common_base                                 |  2 +
 .../linuxapp/eal/include/exec-env/rte_kni_common.h |  8 +++
 lib/librte_mbuf/rte_mbuf.h                         | 67 +++++++++++++++-------
 3 files changed, 56 insertions(+), 21 deletions(-)

diff --git a/config/common_base b/config/common_base
index 379a791..f7c624e 100644
--- a/config/common_base
+++ b/config/common_base
@@ -405,6 +405,8 @@ CONFIG_RTE_LIBRTE_MBUF_DEBUG=n
 CONFIG_RTE_MBUF_DEFAULT_MEMPOOL_OPS="ring_mp_mc"
 CONFIG_RTE_MBUF_REFCNT_ATOMIC=y
 CONFIG_RTE_PKTMBUF_HEADROOM=128
+# Set to y if needing to be mbuf chain friendly.
+CONFIG_RTE_MBUF_CHAIN_FRIENDLY=n

 #
 # Compile librte_timer
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h 
b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 2acdfd9..44d65cd 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -120,11 +120,19 @@ struct rte_kni_mbuf {
        char pad2[4];
        uint32_t pkt_len;       /**< Total pkt len: sum of all segment 
data_len. */
        uint16_t data_len;      /**< Amount of data in segment buffer. */
+#ifdef RTE_MBUF_CHAIN_FRIENDLY
+       char pad3[8];
+       void *next;

        /* fields on second cache line */
+       char pad4[16] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+       void *pool;
+#else
+       /* fields on second cache line */
        char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
        void *pool;
        void *next;
+#endif
 };

 /*
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 15e3a10..6e6ba0e 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -765,6 +765,28 @@ typedef uint8_t  MARKER8[0];  /**< generic marker with 1B 
alignment */
 typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
                                * with a single assignment */

+typedef union {
+       uint32_t rss;     /**< RSS hash result if RSS enabled */
+       struct {
+               union {
+                       struct {
+                               uint16_t hash;
+                               uint16_t id;
+                       };
+                       uint32_t lo;
+                       /**< Second 4 flexible bytes */
+               };
+               uint32_t hi;
+               /**< First 4 flexible bytes or FD ID, dependent on
+                       PKT_RX_FDIR_* flag in ol_flags. */
+       } fdir;           /**< Filter identifier if FDIR enabled */
+       struct {
+               uint32_t lo;
+               uint32_t hi;
+       } sched;          /**< Hierarchical scheduler */
+       uint32_t usr;     /**< User defined tags. See rte_distributor_process() 
*/
+} rss_hash_t;
+
 /**
  * The generic rte_mbuf, containing a packet mbuf.
  */
@@ -824,28 +846,31 @@ struct rte_mbuf {
        uint16_t data_len;        /**< Amount of data in segment buffer. */
        /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
        uint16_t vlan_tci;
+#ifdef RTE_MBUF_CHAIN_FRIENDLY
+       /*
+        * Move offload into the second cache line and next in the first.
+        * Better performance for applications using chained mbufs to have
+        * the next pointer in the first cache line.
+        * If you change this structure, you must change the user-mode
+        * version in rte_kni_common.h to match the new layout.
+        */
+       uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
+       uint16_t vlan_tci_outer;  /**< Outer VLAN Tag Control Identifier (CPU 
order) */
+       struct rte_mbuf *next;    /**< Next segment of scattered packet. */
+
+       /* second cache line - fields only used in slow path or on TX */
+       MARKER cacheline1 __rte_cache_min_aligned;
+
+       rss_hash_t hash;      /**< hash information */

        union {
-               uint32_t rss;     /**< RSS hash result if RSS enabled */
-               struct {
-                       union {
-                               struct {
-                                       uint16_t hash;
-                                       uint16_t id;
-                               };
-                               uint32_t lo;
-                               /**< Second 4 flexible bytes */
-                       };
-                       uint32_t hi;
-                       /**< First 4 flexible bytes or FD ID, dependent on
-                            PKT_RX_FDIR_* flag in ol_flags. */
-               } fdir;           /**< Filter identifier if FDIR enabled */
-               struct {
-                       uint32_t lo;
-                       uint32_t hi;
-               } sched;          /**< Hierarchical scheduler */
-               uint32_t usr;     /**< User defined tags. See 
rte_distributor_process() */
-       } hash;                   /**< hash information */
+               void *userdata;   /**< Can be used for external metadata */
+               uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
+       };
+
+       struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
+#else
+       rss_hash_t hash;      /**< hash information */

        uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */

@@ -862,7 +887,7 @@ struct rte_mbuf {

        struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
        struct rte_mbuf *next;    /**< Next segment of scattered packet. */
-
+#endif
        /* fields to support TX offloads */
        union {
                uint64_t tx_offload;       /**< combined for easy fetch */
-- 
2.8.0.GIT

Reply via email to