PING for review.

Here's some elaboration for reviewers...

Clearly, when the request can be served from the cache (n <= cache->len), the 
patch is correct, regardless if n is constant or variable:

        __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
        if (likely(n <= cache->len)) {
                /*
                 * The entire request can be satisfied from the cache.
                 * If the request size is known at build time,
                 * the compiler unrolls the fixed length copy loop.
                 */
                cache->len -= n;
                for (index = 0; index < n; index++)
                        *obj_table++ = *--cache_objs;

                RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
                RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);

                return 0;
        }


Now, let's see what happens when the request cannot be served from the cache,
i.e. when n > cache-len:

        __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
        if (__rte_constant(n) && n <= cache->len) {
// FALSE, because n > cache->len
// Regardless if n is constant or variable
//              /*
//               * The request size is known at build time, and
//               * the entire request can be satisfied from the cache,
//               * so let the compiler unroll the fixed length copy loop.
//               */
//              cache->len -= n;
//              for (index = 0; index < n; index++)
//                      *obj_table++ = *--cache_objs;
//
//              RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
//              RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
//
//              return 0;
//      }

        /*
         * Use the cache as much as we have to return hot objects first.
         * If the request size 'n' is known at build time, the above comparison
         * ensures that n > cache->len here, so omit RTE_MIN().
         */
        len = __rte_constant(n) ? cache->len : RTE_MIN(n, cache->len);
// ALWAYS: len = cache->len
// When n is constant:
//      len = cache->len
// When n is variable:
//      len = RTE_MIN(n, cache->len)
//              = cache->len, because n > cache->len
        cache->len -= len;
// ALWAYS: cache->len = 0, because len == cache->len
        remaining = n - len;
        for (index = 0; index < len; index++)
                *obj_table++ = *--cache_objs;

        /*
         * If the request size 'n' is known at build time, the case
         * where the entire request can be satisfied from the cache
         * has already been handled above, so omit handling it here.
         */
        if (!__rte_constant(n) && likely(remaining == 0)) {
// FALSE, because remaining > 0
//              /* The entire request is satisfied from the cache. */
//
//              RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
//              RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
//
//              return 0;
//      }

        /* Dequeue below would overflow mem allocated for cache? */
        if (unlikely(remaining > RTE_MEMPOOL_CACHE_MAX_SIZE))
                goto driver_dequeue;

Venlig hilsen / Kind regards,
-Morten Brørup


> -----Original Message-----
> From: Morten Brørup [mailto:[email protected]]
> Sent: Tuesday, 20 January 2026 11.17
> To: Andrew Rybchenko; [email protected]
> Cc: Morten Brørup
> Subject: [PATCH v2] mempool: simplify get objects
> 
> Removed explicit test for build time constant request size,
> and added comment that the compiler loop unrolls when request size is
> build time constant, to improve source code readability.
> 
> Signed-off-by: Morten Brørup <[email protected]>
> ---
> v2:
> * Removed unrelated microoptimization from
> rte_mempool_do_generic_put(),
>   which was also described incorrectly.
> ---
>  lib/mempool/rte_mempool.h | 35 ++++++++---------------------------
>  1 file changed, 8 insertions(+), 27 deletions(-)
> 
> diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
> index aedc100964..4213784e14 100644
> --- a/lib/mempool/rte_mempool.h
> +++ b/lib/mempool/rte_mempool.h
> @@ -1531,11 +1531,11 @@ rte_mempool_do_generic_get(struct rte_mempool
> *mp, void **obj_table,
>       cache_objs = &cache->objs[cache->len];
> 
>       __rte_assume(cache->len <= RTE_MEMPOOL_CACHE_MAX_SIZE * 2);
> -     if (__rte_constant(n) && n <= cache->len) {
> +     if (likely(n <= cache->len)) {
>               /*
> -              * The request size is known at build time, and
> -              * the entire request can be satisfied from the cache,
> -              * so let the compiler unroll the fixed length copy loop.
> +              * The entire request can be satisfied from the cache.
> +              * If the request size is known at build time,
> +              * the compiler unrolls the fixed length copy loop.
>                */
>               cache->len -= n;
>               for (index = 0; index < n; index++)
> @@ -1547,31 +1547,13 @@ rte_mempool_do_generic_get(struct rte_mempool
> *mp, void **obj_table,
>               return 0;
>       }
> 
> -     /*
> -      * Use the cache as much as we have to return hot objects first.
> -      * If the request size 'n' is known at build time, the above
> comparison
> -      * ensures that n > cache->len here, so omit RTE_MIN().
> -      */
> -     len = __rte_constant(n) ? cache->len : RTE_MIN(n, cache->len);
> -     cache->len -= len;
> +     /* Use the cache as much as we have to return hot objects first.
> */
> +     len = cache->len;
>       remaining = n - len;
> +     cache->len = 0;
>       for (index = 0; index < len; index++)
>               *obj_table++ = *--cache_objs;
> 
> -     /*
> -      * If the request size 'n' is known at build time, the case
> -      * where the entire request can be satisfied from the cache
> -      * has already been handled above, so omit handling it here.
> -      */
> -     if (!__rte_constant(n) && likely(remaining == 0)) {
> -             /* The entire request is satisfied from the cache. */
> -
> -             RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
> -             RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
> -
> -             return 0;
> -     }
> -
>       /* Dequeue below would overflow mem allocated for cache? */
>       if (unlikely(remaining > RTE_MEMPOOL_CACHE_MAX_SIZE))
>               goto driver_dequeue;
> @@ -1592,11 +1574,10 @@ rte_mempool_do_generic_get(struct rte_mempool
> *mp, void **obj_table,
>       __rte_assume(cache->size <= RTE_MEMPOOL_CACHE_MAX_SIZE);
>       __rte_assume(remaining <= RTE_MEMPOOL_CACHE_MAX_SIZE);
>       cache_objs = &cache->objs[cache->size + remaining];
> +     cache->len = cache->size;
>       for (index = 0; index < remaining; index++)
>               *obj_table++ = *--cache_objs;
> 
> -     cache->len = cache->size;
> -
>       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_bulk, 1);
>       RTE_MEMPOOL_CACHE_STAT_ADD(cache, get_success_objs, n);
> 
> --
> 2.43.0

Reply via email to