The intention of this benchmark is to measure events delivery
bandwidth: N threads produce events and 1 thread consumes events
calling epoll_wait(2).

Benchmark does measurements for 8, 16, 32, 64 and 128 threads in
a loop.

This one differs from epoll-wait-1pmc in that it produces events
from many threads and consumes from one, thus mp1c (many producers
1 consumer).

Signed-off-by: Roman Penyaev <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: [email protected]
---
 tools/perf/bench/Build             |   1 +
 tools/perf/bench/bench.h           |   1 +
 tools/perf/bench/epoll-wait-mp1c.c | 175 +++++++++++++++++++++++++++++
 tools/perf/builtin-bench.c         |   1 +
 4 files changed, 178 insertions(+)
 create mode 100644 tools/perf/bench/epoll-wait-mp1c.c

diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index ef07fc40bc35..570df3f475b8 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -8,6 +8,7 @@ perf-y += futex-requeue.o
 perf-y += futex-lock-pi.o
 
 perf-y += epoll-wait-1pmc.o
+perf-y += epoll-wait-mp1c.o
 perf-y += epoll-ctl.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index fb9782624644..2ee7e7256e23 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -39,6 +39,7 @@ int bench_futex_requeue(int argc, const char **argv);
 int bench_futex_lock_pi(int argc, const char **argv);
 
 int bench_epoll_wait_1pmc(int argc, const char **argv);
+int bench_epoll_wait_mp1c(int argc, const char **argv);
 int bench_epoll_ctl(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
diff --git a/tools/perf/bench/epoll-wait-mp1c.c 
b/tools/perf/bench/epoll-wait-mp1c.c
new file mode 100644
index 000000000000..44b06ae86e5b
--- /dev/null
+++ b/tools/perf/bench/epoll-wait-mp1c.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef HAVE_EVENTFD
+/*
+ * Copyright (C) 2019 Roman Penyaev
+ *
+ * This program benchmarks bandwidth of events delivered from many threads
+ * (many producers) to a single consumer, which monitors for events calling
+ * epoll_wait(2).
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+#include <err.h>
+#include <linux/kernel.h> /* For ARRAY_SIZE only */
+#include "bench.h"
+#include "cpumap.h"
+
+#define ITERS     1000000ull
+
+struct thread_ctx {
+       pthread_t thread;
+       int efd;
+};
+
+static volatile unsigned int thr_ready;
+static volatile unsigned int start;
+
+static inline unsigned long long nsecs(void)
+{
+       struct timespec ts = {0, 0};
+
+       clock_gettime(CLOCK_MONOTONIC, &ts);
+       return ((unsigned long long)ts.tv_sec * 1000000000ull) + ts.tv_nsec;
+}
+
+static void *thread_work(void *arg)
+{
+       struct thread_ctx *ctx = arg;
+       uint64_t ucnt = 1;
+       unsigned int i;
+       int rc;
+
+       __atomic_add_fetch(&thr_ready, 1, __ATOMIC_RELAXED);
+
+       while (!start)
+               ;
+
+       for (i = 0; i < ITERS; i++) {
+               rc = write(ctx->efd, &ucnt, sizeof(ucnt));
+               assert(rc == sizeof(ucnt));
+       }
+
+       return NULL;
+}
+
+static int do_bench(struct cpu_map *cpu, unsigned int nthreads)
+{
+       struct epoll_event ev, events[nthreads];
+       struct thread_ctx threads[nthreads];
+       pthread_attr_t thrattr;
+       struct thread_ctx *ctx;
+       int rc, epfd, nfds;
+       cpu_set_t cpuset;
+       unsigned int i;
+
+       unsigned long long epoll_calls = 0, epoll_nsecs;
+       unsigned long long ucnt, ucnt_sum = 0;
+
+       epfd = epoll_create1(0);
+       if (epfd < 0)
+               err(EXIT_FAILURE, "epoll_create1");
+
+       for (i = 0; i < nthreads; i++) {
+               ctx = &threads[i];
+
+               ctx->efd = eventfd(0, EFD_NONBLOCK);
+               if (ctx->efd < 0)
+                       err(EXIT_FAILURE, "eventfd");
+
+               ev.events = EPOLLIN;
+               ev.data.ptr = ctx;
+               rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ctx->efd, &ev);
+               if (rc)
+                       err(EXIT_FAILURE, "epoll_ctl");
+
+               CPU_ZERO(&cpuset);
+               CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+               pthread_attr_init(&thrattr);
+               rc = pthread_attr_setaffinity_np(&thrattr, sizeof(cpu_set_t),
+                                                &cpuset);
+               if (rc) {
+                       errno = rc;
+                       err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+               }
+
+               rc = pthread_create(&ctx->thread, &thrattr, thread_work, ctx);
+               if (rc) {
+                       errno = rc;
+                       err(EXIT_FAILURE, "pthread_create");
+               }
+       }
+
+       while (thr_ready == nthreads)
+               ;
+
+       /* Signal start for all threads */
+       start = 1;
+
+       epoll_nsecs = nsecs();
+       while (1) {
+               nfds = epoll_wait(epfd, events, nthreads, -1);
+               if (nfds < 0)
+                       err(EXIT_FAILURE, "epoll_wait");
+
+               epoll_calls++;
+
+               for (i = 0; i < (unsigned int)nfds; ++i) {
+                       ctx = events[i].data.ptr;
+                       rc = read(ctx->efd, &ucnt, sizeof(ucnt));
+                       assert(rc == sizeof(ucnt));
+                       ucnt_sum += ucnt;
+                       if (ucnt_sum == nthreads * ITERS)
+                               goto end;
+               }
+       }
+end:
+       epoll_nsecs = nsecs() - epoll_nsecs;
+
+       for (i = 0; i < nthreads; i++) {
+               ctx = &threads[i];
+               pthread_join(ctx->thread, NULL);
+               close(ctx->efd);
+       }
+       close(epfd);
+
+       printf("%7d   %8lld     %8lld\n",
+                  nthreads,
+                  ITERS*nthreads/(epoll_nsecs/1000/1000),
+                  epoll_nsecs/1000/1000);
+
+       return 0;
+}
+
+int bench_epoll_wait_mp1c(int argc, const char **argv)
+{
+       unsigned int i, nthreads_arr[] = {8, 16, 32, 64, 128};
+       struct cpu_map *cpu;
+
+       (void)argc; (void)argv;
+
+       cpu = cpu_map__new(NULL);
+       if (!cpu) {
+               errno = ENOMEM;
+               err(EXIT_FAILURE, "cpu_map__new");
+       }
+
+       printf("threads  events/ms  run-time ms\n");
+       for (i = 0; i < ARRAY_SIZE(nthreads_arr); i++)
+               do_bench(cpu, nthreads_arr[i]);
+
+       cpu_map__put(cpu);
+
+       return 0;
+}
+
+#endif // HAVE_EVENTFD
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 6926b5a5eebf..4426b7afadf3 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -71,6 +71,7 @@ static struct bench futex_benchmarks[] = {
 #ifdef HAVE_EVENTFD
 static struct bench epoll_benchmarks[] = {
        { "wait-1pmc",  "Benchmark epoll concurrent epoll_waits",       
bench_epoll_wait_1pmc   },
+       { "wait-mp1c",  "Benchmark epoll events delivery bandwidth",    
bench_epoll_wait_mp1c   },
        { "ctl",        "Benchmark epoll concurrent epoll_ctls",        
bench_epoll_ctl         },
        { "all",        "Run all futex benchmarks",                     NULL    
                },
        { NULL,         NULL,                                           NULL    
                }
-- 
2.19.1

Reply via email to