string_kunit: add performance benchmark for strlen()

Andy Shevchenko Fri, 23 Jan 2026 03:07:09 -0800

On Fri, Jan 23, 2026 at 04:58:37PM +0800, Feng Jiang wrote:
> Introduce a benchmarking framework to the string_kunit test suite to
> measure the execution efficiency of string functions.
> 
> The implementation is inspired by crc_benchmark(), measuring throughput
> (MB/s) and latency (ns/call) across a range of string lengths. It
> includes a warm-up phase, disables preemption during measurement, and
> uses a fixed seed for reproducible results.
> 
> This framework allows for comparing different implementations (e.g.,
> generic C vs. architecture-optimized assembly) within the KUnit
> environment.
> 
> Initially, provide a benchmark for strlen().


...

> +static void *alloc_max_bench_buffer(struct kunit *test,
> +             const size_t *lens, size_t count, size_t *buf_len)
> +{
> +     size_t i, max_len = 0;
> +     void *buf;

> +     for (i = 0; i < count; i++) {
> +             if (max_len < lens[i])
> +                     max_len = lens[i];
> +     }

        size_t max_len = 0;
        void *buf;

        for (size_t i = 0; i < count; i++)
                max_len = max(lens[i], max_len);

> +     /* Add space for NUL character */
> +     max_len += 1;
> +
> +     buf = kunit_kzalloc(test, max_len, GFP_KERNEL);
> +     if (!buf)
> +             return NULL;
> +
> +     if (buf_len)
> +             *buf_len = max_len;
> +
> +     return buf;
> +}

...

> +#define STRING_BENCH(iters, func, ...)                                       
> \
> +({                                                                   \
> +     /* Volatile function pointer prevents dead code elimination */  \
> +     typeof(func) (* volatile __func) = (func);                      \
> +     size_t __bn_iters = (iters);                                    \
> +     size_t __bn_warm_iters;                                         \

> +     size_t __bn_i;                                                  \

Define it inside for-loop:s.

> +     u64 __bn_t;                                                     \
> +                                                                     \
> +     __bn_warm_iters = max(__bn_iters / 10, 50U);                    \
> +                                                                     \
> +     for (__bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++)            \
> +             (void)__func(__VA_ARGS__);                              \
> +                                                                     \
> +     preempt_disable();                                              \
> +     __bn_t = ktime_get_ns();                                        \
> +     for (__bn_i = 0; __bn_i < __bn_iters; __bn_i++)                 \
> +             (void)__func(__VA_ARGS__);                              \
> +     __bn_t = ktime_get_ns() - __bn_t;                               \
> +     preempt_enable();                                               \
> +     __bn_t;                                                         \
> +})

...

> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)                
> \
> +do {                                                                 \
> +     size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;                \
> +     u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;                           \
> +     char *buf_name, *_bn_buf;                                       \

> +     if (!IS_ENABLED(CONFIG_STRING_KUNIT_BENCH))                     \
> +             kunit_skip(test, "not enabled");                        \

Hmm... Since it's a macro anyway, I think the old style is okay:


#if IS_ENABLED(CONFIG_STRING_KUNIT_BENCH)
#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)           \
        ...
#else
#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)           \
        kunit_skip(test, "not enabled");                                \
#endif

But check it that it doesn't produce warnings in `make W=1` case.

> +     _bn_buf = alloc_max_bench_buffer(test, bench_lens,              \
> +                     ARRAY_SIZE(bench_lens), &_bn_size);             \
> +     KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);                    \
> +                                                                     \
> +     fill_random_string(_bn_buf, _bn_size);                          \
> +                                                                     \
> +     for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {      \
> +             buf_size = bench_lens[_bn_i];                           \
> +             buf_name = _bn_buf + _bn_size - buf_size - 1;           \
> +             _bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);  \
> +                                                                     \
> +             _bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);   \
> +                                                                     \
> +             if (_bn_t > 0) {                                        \
> +                     _bn_mbps = (u64)(buf_size) * _bn_iters * 1000;  \

"KILO"? Or "(MEGA/KILO)"? I'm puzzled with this 1000 multiplier.

> +                     _bn_mbps = div64_u64(_bn_mbps, _bn_t);          \
> +                     _bn_lat = div64_u64(_bn_t, _bn_iters);          \
> +             }                                                       \
> +             kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n", \
> +                             buf_size, _bn_mbps, _bn_lat);           \
> +     }                                                               \
> +} while (0)

-- 
With Best Regards,
Andy Shevchenko

Re: [PATCH v4 4/8] lib/string_kunit: add performance benchmark for strlen()

Reply via email to