Add optional benchmarks for memcpy() and memmove() functions.
Each benchmark is run with different size and both aligned and unaligned
buffers, to spot unaligned issues on platforms where they have a noticeable
performance impact.

Sample output on a RISC-V machine:
        # modprobe memcpy_kunit
        KTAP version 1
        1..1
            KTAP version 1
            # Subtest: memcpy
            # module: memcpy_kunit
            1..8
            [...]
            # memcpy_bench_test: memcpy: aligned copy of len 2: 6 MB/s
            # memcpy_bench_test: memcpy: unaligned copy of len 2: 6 MB/s
            # memcpy_bench_test: memcpy: aligned copy of len 64: 179 MB/s
            # memcpy_bench_test: memcpy: unaligned copy of len 64: 170 MB/s
            # memcpy_bench_test: memcpy: aligned copy of len 256: 697 MB/s
            # memcpy_bench_test: memcpy: unaligned copy of len 256: 421 MB/s
            # memcpy_bench_test: memcpy: aligned copy of len 4194304: 935 MB/s
            # memcpy_bench_test: memcpy: unaligned copy of len 4194304: 333 MB/s
            # memcpy_bench_test.speed: slow
            ok 7 memcpy_bench_test
            # memmove_bench_test: memmove: aligned move of len 64: 162 MB/s
            # memmove_bench_test: memmove: unaligned move of len 64: 162 MB/s
            # memmove_bench_test: memmove: aligned move of len 256: 647 MB/s
            # memmove_bench_test: memmove: unaligned move of len 256: 647 MB/s
            # memmove_bench_test: memmove: aligned move of len 4194304: 1540 
MB/s
            # memmove_bench_test: memmove: unaligned move of len 4194304: 1557 
MB/s
            # memmove_bench_test.speed: slow
            ok 8 memmove_bench_test
        # memcpy: pass:8 fail:0 skip:0 total:8
        # Totals: pass:8 fail:0 skip:0 total:8
        ok 1 memcpy

Signed-off-by: Matteo Croce <[email protected]>
---
v5: rebase to fix a conflict with recent changes

 lib/Kconfig.debug        |   9 ++++
 lib/tests/memcpy_kunit.c | 107 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 318df4c75454..a61774690362 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2986,6 +2986,15 @@ config MIN_HEAP_KUNIT_TEST
 
          If unsure, say N
 
+config MEMCPY_KUNIT_BENCHMARK
+       bool "Benchmark string functions"
+       depends on MEMCPY_KUNIT_TEST
+       help
+         A benchmark for memcpy() and memmove() functions,
+         with both aligned and unaligned buffers.
+
+         If unsure, say N.
+
 config IS_SIGNED_TYPE_KUNIT_TEST
        tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS
        depends on KUNIT
diff --git a/lib/tests/memcpy_kunit.c b/lib/tests/memcpy_kunit.c
index d36933554e46..e7588b868125 100644
--- a/lib/tests/memcpy_kunit.c
+++ b/lib/tests/memcpy_kunit.c
@@ -493,6 +493,109 @@ static void memmove_overlap_test(struct kunit *test)
        }
 }
 
+#ifdef CONFIG_MEMCPY_KUNIT_BENCHMARK
+
+#define COPIES_NUM     100
+
+static void memcpy_bench_size_align(struct kunit *test, int size, bool unalign)
+{
+       u64 start, end, total_ns = 0;
+       char *dst, *src;
+       int ret = 0;
+
+       dst = kzalloc(size, GFP_KERNEL);
+       if (!dst) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       src = kzalloc(size, GFP_KERNEL);
+       if (!src) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       for (int i = 0; i < COPIES_NUM; i++) {
+               local_irq_disable();
+               start = ktime_get_ns();
+               memcpy(dst + unalign, src, size - unalign);
+               end = ktime_get_ns();
+               local_irq_enable();
+               total_ns += end - start;
+       }
+
+       /* Avoid division by zero */
+       if (!total_ns)
+               total_ns = 1;
+
+       kunit_info(test, "memcpy: %saligned copy of len %d: %lld MB/s\n",
+                  unalign ? "un" : "", size,
+                  (COPIES_NUM * size * 1000000000ULL / total_ns) / (1024 * 
1024));
+
+       kfree(src);
+
+out_free:
+       kfree(dst);
+
+out:
+       KUNIT_ASSERT_EQ(test, ret, 0);
+}
+
+static void memcpy_bench_size(struct kunit *test, int size)
+{
+       memcpy_bench_size_align(test, size, false);
+       memcpy_bench_size_align(test, size, true);
+}
+
+static void memcpy_bench_test(struct kunit *test)
+{
+       memcpy_bench_size(test, 2);
+       memcpy_bench_size(test, 64);
+       memcpy_bench_size(test, 256);
+       memcpy_bench_size(test, PAGE_SIZE << MAX_PAGE_ORDER);
+}
+
+static void memmove_bench_size_align(struct kunit *test, int size, bool 
unalign)
+{
+       u64 start, end, total_ns = 0;
+       char *buf;
+       const int shift = size / 10;
+
+       buf = kzalloc(size, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_NULL(test, buf);
+
+       for (int i = 0; i < COPIES_NUM; i++) {
+               local_irq_disable();
+               start = ktime_get_ns();
+               memmove(buf + shift + unalign, buf, size - shift - unalign);
+               end = ktime_get_ns();
+               local_irq_enable();
+               total_ns += end - start;
+       }
+
+       if (!total_ns)
+               total_ns = 1;
+
+       kunit_info(test, "memmove: %saligned move of len %d: %lld MB/s\n",
+                  unalign ? "un" : "", size,
+                  (COPIES_NUM * (size - shift) * 1000000000ULL / total_ns) / 
(1024 * 1024));
+       kfree(buf);
+}
+
+static void memmove_bench_size(struct kunit *test, int size)
+{
+       memmove_bench_size_align(test, size, false);
+       memmove_bench_size_align(test, size, true);
+}
+
+static void memmove_bench_test(struct kunit *test)
+{
+       memmove_bench_size(test, 64);
+       memmove_bench_size(test, 256);
+       memmove_bench_size(test, PAGE_SIZE << MAX_PAGE_ORDER);
+}
+#endif
+
 static struct kunit_case memcpy_test_cases[] = {
        KUNIT_CASE(memset_test),
        KUNIT_CASE(memcpy_test),
@@ -500,6 +603,10 @@ static struct kunit_case memcpy_test_cases[] = {
        KUNIT_CASE_SLOW(memmove_test),
        KUNIT_CASE_SLOW(memmove_large_test),
        KUNIT_CASE_SLOW(memmove_overlap_test),
+#ifdef CONFIG_MEMCPY_KUNIT_BENCHMARK
+       KUNIT_CASE_SLOW(memcpy_bench_test),
+       KUNIT_CASE_SLOW(memmove_bench_test),
+#endif
        {}
 };
 
-- 
2.53.0


Reply via email to