put objects unlikely code paths

Morten Brørup Mon, 16 Feb 2026 03:58:32 -0800

Reduced compiled code footprint by de-inlining unlikely
code paths.

Signed-off-by: Morten Brørup <[email protected]>
---
 lib/mempool/rte_mempool.c | 153 +++++++++++++++++++++++++++++
 lib/mempool/rte_mempool.h | 202 +++++++++++++++++++-------------------
 2 files changed, 254 insertions(+), 101 deletions(-)


diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
index 3042d94c14..078d6143c7 100644
--- a/lib/mempool/rte_mempool.c
+++ b/lib/mempool/rte_mempool.c
@@ -1016,6 +1016,118 @@ rte_mempool_create(const char *name, unsigned n, 
unsigned elt_size,
        return NULL;
 }
 
+/* internal */
+RTE_EXPORT_INTERNAL_SYMBOL(_rte_mempool_do_generic_put_more)
+void
+_rte_mempool_do_generic_put_more(struct rte_mempool *mp, void * const 
*obj_table,
+               unsigned int n, struct rte_mempool_cache *cache)
+{
+       __rte_assume(cache->flushthresh <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
+       __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
+       __rte_assume(cache->len <= cache->flushthresh);
+       __rte_assume(cache->len + n > cache->flushthresh);
+       if (likely(n <= cache->flushthresh)) {
+               uint32_t len;
+               void **cache_objs;
+
+               /*
+                * The cache is big enough for the objects, but - as detected by
+                * rte_mempool_do_generic_put() - has insufficient room for 
them.
+                * Flush the cache to make room for the objects.
+                */
+               len = cache->len;
+               cache_objs = &cache->objs[0];
+               cache->len = n;
+               rte_mempool_ops_enqueue_bulk(mp, cache_objs, len);
+
+               /* Add the objects to the cache. */
+#if 0 /* Simple alternative to rte_memcpy(). */
+               for (uint32_t index = 0; index < n; index++)
+                       *cache_objs++ = *obj_table++;
+#else
+               rte_memcpy(cache_objs, obj_table, sizeof(void *) * n);
+#endif
+
+               return;
+       }
+
+       /* The request itself is too big for the cache. Push objects directly 
to the backend. */
+       rte_mempool_ops_enqueue_bulk(mp, obj_table, n);
+}
+
+/* internal */
+RTE_EXPORT_INTERNAL_SYMBOL(_rte_mempool_do_generic_get_more)
+int
+_rte_mempool_do_generic_get_more(struct rte_mempool *mp, void **obj_table,
+               unsigned int n, struct rte_mempool_cache *cache)
+{
+       int ret;
+       unsigned int remaining;
+       uint32_t index, len;
+       void **cache_objs;
+
+       /* Use the cache as much as we have to return hot objects first. */
+       __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
+       len = cache->len;
+       remaining = n - len;
+       cache_objs = &cache->objs[len];
+       cache->len = 0;
+       for (index = 0; index < len; index++)
+               *obj_table++ = *--cache_objs;
+
+       /* Dequeue below would overflow mem allocated for cache? */
+       if (unlikely(remaining > RTE_MEMPOOL_CACHE_MAX_SIZE))
+               goto driver_dequeue;
+
+       /* Fill the cache from the backend; fetch size + remaining objects. */
+       ret = rte_mempool_ops_dequeue_bulk(mp, cache->objs,
+                       cache->size + remaining);
+       if (unlikely(ret < 0)) {
+               /*
+                * We are buffer constrained, and not able to fetch all that.
+                * Do not fill the cache, just satisfy the remaining part of
+                * the request directly from the backend.
+                */
+               goto driver_dequeue;
+       }
+
+       /* Satisfy the remaining part of the request from the filled cache. */
+       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
+       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
+
+       __rte_assume(cache->size <= RTE_MEMPOOL_CACHE_MAX_SIZE);
+       __rte_assume(remaining <= RTE_MEMPOOL_CACHE_MAX_SIZE);
+       cache_objs = &cache->objs[cache->size + remaining];
+       cache->len = cache->size;
+       for (index = 0; index < remaining; index++)
+               *obj_table++ = *--cache_objs;
+
+       return 0;
+
+driver_dequeue:
+
+       /* Get remaining objects directly from the backend. */
+       ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, remaining);
+
+       if (unlikely(ret < 0)) {
+               cache->len = n - remaining;
+               /*
+                * No further action is required to roll the first part
+                * of the request back into the cache, as objects in
+                * the cache are intact.
+                */
+
+               RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
+               RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
+       } else {
+               RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
+               RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
+               __rte_assume(ret == 0);
+       }
+
+       return ret;
+}
+
 /* Return the number of entries in the mempool */
 RTE_EXPORT_SYMBOL(rte_mempool_avail_count)
 unsigned int
@@ -1634,3 +1746,44 @@ RTE_INIT(mempool_init_telemetry)
        rte_telemetry_register_cmd("/mempool/info", mempool_handle_info,
                "Returns mempool info. Parameters: pool_name");
 }
+
+void
+review_rte_mempool_do_generic_put(struct rte_mempool *mp, void * const 
*obj_table,
+                           unsigned int n, struct rte_mempool_cache *cache)
+{ rte_mempool_do_generic_put(mp, obj_table, n, cache); }
+
+void
+review_rte_mempool_do_generic_put_const32(struct rte_mempool *mp, void * const 
*obj_table,
+                           struct rte_mempool_cache *cache)
+{ rte_mempool_do_generic_put(mp, obj_table, 32, cache); }
+
+void
+review_rte_mempool_do_generic_put_const1(struct rte_mempool *mp, void * const 
*obj_table,
+                           struct rte_mempool_cache *cache)
+{ rte_mempool_do_generic_put(mp, obj_table, 1, cache); }
+
+int
+review_rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,
+                           unsigned int n, struct rte_mempool_cache *cache)
+{ return rte_mempool_do_generic_get(mp, obj_table, n, cache); }
+
+int
+review_rte_mempool_do_generic_get_const32(struct rte_mempool *mp, void 
**obj_table,
+                           struct rte_mempool_cache *cache)
+{ return rte_mempool_do_generic_get(mp, obj_table, 32, cache); }
+
+int
+review_rte_mempool_do_generic_get_const1(struct rte_mempool *mp, void 
**obj_table,
+                           struct rte_mempool_cache *cache)
+{ return rte_mempool_do_generic_get(mp, obj_table, 1, cache); }
+
+int
+review_rte_mempool_do_generic_get_const1ret(struct rte_mempool *mp, void 
**obj_table,
+                           struct rte_mempool_cache *cache)
+{
+    int ret = rte_mempool_do_generic_get(mp, obj_table, 1, cache);
+    if (ret == 0)
+        return 0x1234;
+    else
+        exit(ret);
+}
diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
index 7989d7a475..9658bd7b4a 100644
--- a/lib/mempool/rte_mempool.h
+++ b/lib/mempool/rte_mempool.h
@@ -1370,6 +1370,24 @@ rte_mempool_cache_flush(struct rte_mempool_cache *cache,
        cache->len = 0;
 }
 
+/**
+ * @internal
+ * Put several objects back in the mempool, more than the cache has room for; 
used internally.
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to store back in the mempool, must be strictly
+ *   positive.
+ * @param cache
+ *   A pointer to a mempool cache structure.
+ */
+__rte_internal
+void
+_rte_mempool_do_generic_put_more(struct rte_mempool *mp, void * const 
*obj_table,
+               unsigned int n, struct rte_mempool_cache *cache);
+
 /**
  * @internal Put several objects back in the mempool; used internally.
  * @param mp
@@ -1388,9 +1406,16 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void 
* const *obj_table,
 {
        void **cache_objs;
 
-       /* No cache provided? */
-       if (unlikely(cache == NULL))
-               goto driver_enqueue;
+       if (unlikely(cache == NULL)) {
+               /* No cache. Push objects directly to the backend. */
+               /* Increment stats now, adding in mempool always succeeds. */
+               RTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1);
+               RTE_MEMPOOL_STAT_ADD(mp, put_objs, n);
+
+               rte_mempool_ops_enqueue_bulk(mp, obj_table, n);
+
+               return;
+       }
 
        /* Increment stats now, adding in mempool always succeeds. */
        RTE_MEMPOOL_CACHE_STAT_ADD(cache, put_bulk, 1);
@@ -1403,35 +1428,43 @@ rte_mempool_do_generic_put(struct rte_mempool *mp, void 
* const *obj_table,
                /* Sufficient room in the cache for the objects. */
                cache_objs = &cache->objs[cache->len];
                cache->len += n;
-       } else if (n <= cache->flushthresh) {
+
+cache_enqueue:
+#if 0 /* Simple alternative to rte_memcpy(). */
                /*
-                * The cache is big enough for the objects, but - as detected by
-                * the comparison above - has insufficient room for them.
-                * Flush the cache to make room for the objects.
+                * Add the objects to the cache.
+                * If the request size is known at build time,
+                * the compiler unrolls the fixed length copy loop.
                 */
-               cache_objs = &cache->objs[0];
-               rte_mempool_ops_enqueue_bulk(mp, cache_objs, cache->len);
-               cache->len = n;
-       } else {
-               /* The request itself is too big for the cache. */
-               goto driver_enqueue_stats_incremented;
-       }
-
-       /* Add the objects to the cache. */
-       rte_memcpy(cache_objs, obj_table, sizeof(void *) * n);
+               for (uint32_t index = 0; index < n; index++)
+                       *cache_objs++ = *obj_table++;
+#else
+               /* Add the objects to the cache. */
+               rte_memcpy(cache_objs, obj_table, sizeof(void *) * n);
+#endif
 
-       return;
+               return;
+       }
 
-driver_enqueue:
+       if (__rte_constant(n) && likely(n <= cache->flushthresh)) {
+               uint32_t len;
 
-       /* increment stat now, adding in mempool always success */
-       RTE_MEMPOOL_STAT_ADD(mp, put_bulk, 1);
-       RTE_MEMPOOL_STAT_ADD(mp, put_objs, n);
+               /*
+                * The cache is big enough for the objects, but - as detected
+                * above - has insufficient room for them.
+                * Flush the cache to make room for the objects.
+                */
+               len = cache->len;
+               cache_objs = &cache->objs[0];
+               cache->len = n;
+               rte_mempool_ops_enqueue_bulk(mp, cache_objs, len);
 
-driver_enqueue_stats_incremented:
+               /* Add the objects to the cache. */
+               goto cache_enqueue;
+       }
 
-       /* push objects to the backend */
-       rte_mempool_ops_enqueue_bulk(mp, obj_table, n);
+       /* Insufficient room in the cache for the objects. */
+       _rte_mempool_do_generic_put_more(mp, obj_table, n, cache);
 }
 
 
@@ -1498,6 +1531,26 @@ rte_mempool_put(struct rte_mempool *mp, void *obj)
        rte_mempool_put_bulk(mp, &obj, 1);
 }
 
+/**
+ * @internal
+ * Get several objects from the mempool, more than held in the cache; used 
internally.
+ * @param mp
+ *   A pointer to the mempool structure.
+ * @param obj_table
+ *   A pointer to a table of void * pointers (objects).
+ * @param n
+ *   The number of objects to get, must be strictly positive.
+ * @param cache
+ *   A pointer to a mempool cache structure.
+ * @return
+ *   - 0: Success.
+ *   - <0: Error; code of driver dequeue function.
+ */
+__rte_internal
+int
+_rte_mempool_do_generic_get_more(struct rte_mempool *mp, void **obj_table,
+               unsigned int n, struct rte_mempool_cache *cache);
+
 /**
  * @internal Get several objects from the mempool; used internally.
  * @param mp
@@ -1516,26 +1569,36 @@ static __rte_always_inline int
 rte_mempool_do_generic_get(struct rte_mempool *mp, void **obj_table,
                           unsigned int n, struct rte_mempool_cache *cache)
 {
-       int ret;
-       unsigned int remaining;
-       uint32_t index, len;
-       void **cache_objs;
-
-       /* No cache provided? */
        if (unlikely(cache == NULL)) {
-               remaining = n;
-               goto driver_dequeue;
-       }
+               int ret;
 
-       /* The cache is a stack, so copy will be in reverse order. */
-       cache_objs = &cache->objs[cache->len];
+               /* No cache. Get objects directly from the backend. */
+               ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, n);
+
+               if (unlikely(ret < 0)) {
+                       RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
+                       RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
+               } else {
+                       RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
+                       RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n);
+                       __rte_assume(ret == 0);
+               }
+
+               return ret;
+       }
 
        __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
        if (likely(n <= cache->len)) {
+               uint32_t index;
+               void **cache_objs;
+
                /* The entire request can be satisfied from the cache. */
                RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
                RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
 
+               /* The cache is a stack, so copy will be in reverse order. */
+               cache_objs = &cache->objs[cache->len];
+
                /*
                 * If the request size is known at build time,
                 * the compiler unrolls the fixed length copy loop.
@@ -1547,71 +1610,8 @@ rte_mempool_do_generic_get(struct rte_mempool *mp, void 
**obj_table,
                return 0;
        }
 
-       /* Use the cache as much as we have to return hot objects first. */
-       len = cache->len;
-       remaining = n - len;
-       cache->len = 0;
-       for (index = 0; index < len; index++)
-               *obj_table++ = *--cache_objs;
-
-       /* Dequeue below would overflow mem allocated for cache? */
-       if (unlikely(remaining > RTE_MEMPOOL_CACHE_MAX_SIZE))
-               goto driver_dequeue;
-
-       /* Fill the cache from the backend; fetch size + remaining objects. */
-       ret = rte_mempool_ops_dequeue_bulk(mp, cache->objs,
-                       cache->size + remaining);
-       if (unlikely(ret < 0)) {
-               /*
-                * We are buffer constrained, and not able to fetch all that.
-                * Do not fill the cache, just satisfy the remaining part of
-                * the request directly from the backend.
-                */
-               goto driver_dequeue;
-       }
-
-       /* Satisfy the remaining part of the request from the filled cache. */
-       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
-       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
-
-       __rte_assume(cache->size <= RTE_MEMPOOL_CACHE_MAX_SIZE);
-       __rte_assume(remaining <= RTE_MEMPOOL_CACHE_MAX_SIZE);
-       cache_objs = &cache->objs[cache->size + remaining];
-       cache->len = cache->size;
-       for (index = 0; index < remaining; index++)
-               *obj_table++ = *--cache_objs;
-
-       return 0;
-
-driver_dequeue:
-
-       /* Get remaining objects directly from the backend. */
-       ret = rte_mempool_ops_dequeue_bulk(mp, obj_table, remaining);
-
-       if (unlikely(ret < 0)) {
-               if (likely(cache != NULL)) {
-                       cache->len = n - remaining;
-                       /*
-                        * No further action is required to roll the first part
-                        * of the request back into the cache, as objects in
-                        * the cache are intact.
-                        */
-               }
-
-               RTE_MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
-               RTE_MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
-       } else {
-               if (likely(cache != NULL)) {
-                       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
-                       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
-               } else {
-                       RTE_MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
-                       RTE_MEMPOOL_STAT_ADD(mp, get_success_objs, n);
-               }
-               __rte_assume(ret == 0);
-       }
-
-       return ret;
+       /* The entire request cannot be satisfied from the cache. */
+       return _rte_mempool_do_generic_get_more(mp, obj_table, n, cache);
 }
 
 /**
-- 
2.43.0

[RFC PATCH 2/2] mempool: de-inline get/put objects unlikely code paths

Reply via email to