On Mon, Apr 27, 2015 at 03:14:49PM -0700, Linus Torvalds wrote:
> On Mon, Apr 27, 2015 at 3:00 PM, Linus Torvalds
> <torva...@linux-foundation.org> wrote:
> >
> > IOW, all the people who say that it's about avoiding context switches
> > are probably just full of shit. It's not about context switches, it's
> > about bad user-level code.
> 
> Just to make sure, I did a system-wide profile (so that you can
> actually see the overhead of context switching better), and that
> didn't change the picture.
> 
> The scheduler overhead *might* be 1% or so.
> 
> So really. The people who talk about how kdbus improves performance
> are just full of sh*t. Yes, it improves things, but the improvement
> seems to be 100% "incidental", in that it avoids a few trips down the
> user-space problems.

I was interested how plain UDS performs compared to the
dbus-client/dbus-server benchmark when doing a similar
transaction (RPC call from client1 to client2 via a server,
i.e 4 send() and 4 recv() syscalls per RPC msg).
Since I had worked on socket code for some project anyway, I
decided to write a stupid little benchmark.

On my machine, dbus-client/dbus-server needs ~200us per call (1024 byte msg),
UDS "dbus call" needs ~23us.  Of course, someone who cares about performance
wouldn't use sync RPC via a message broker, so I added
single-client and async mode to the benchmark for comparison.
Async mode not only decreases scheduling overhead, it also
can use two CPU cores, so it's more than twice as fast.

  ./server dbus
      (you need to run two clients, the timing loop starts
      when the second client connects)
      ./client sync 4096 1000000
         22.757250 s, 43942 msg/s, 22.8 us/msg, 171.638 MB/s
      ./client async 4096 1000000
         8.197482 s, 121989 msg/s, 8.2 us/msg, 476.488 MB/s
  ./server single
      (only a single client talks to the server)
      ./client sync 4096 1000000
         10.980143 s, 91073 msg/s, 11.0 us/msg, 355.733 MB/s
      ./client async 4096 1000000
         3.041953 s, 328736 msg/s, 3.0 us/msg, 1284.044 MB/s

In all cases 1 msg means "send request + receive response".


Johannes
/* UDS server */

#include <errno.h>
#include <poll.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/timerfd.h>
#include <sys/un.h>


// use abstract address
#define SOCKET "\0udsbench.socket"

static int die(const char *msg)
{
	if (errno)
		fprintf(stderr, "%s: error %d %m\n", msg, errno);
	else
		fprintf(stderr, "%s\n", msg);
	if (errno != ECONNRESET)
		exit(EXIT_FAILURE);
	return 0;
}

int main(int argc, char *argv[])
{
	struct sockaddr_un addr = {
		.sun_family = AF_UNIX,
		.sun_path = SOCKET,
	};
	int sock, client1, client2 = -1, rc, len;
	struct pollfd pfd[2];
	char buf[65536];
	unsigned long cnt = 0;
	bool single = false;

	if (argc != 2)
		die("usage: server {single|dbus}");
	if (!strcmp(argv[1], "single"))
		single = true;
	printf("running in %s mode\n", single ? "single" : "dbus");

	sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
	if (sock < 0)
		die("can't create socket");
	if (bind(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0)
		die("can't bind address");
	if (listen(sock, 5) < 0)
		die("can't listen");

	printf("waiting for client 1\n");
	client1 = accept(sock, NULL, NULL);
	if (client1 < 0)
		die("accept");

	if (!single) {
		printf("waiting for client 2\n");
		client2 = accept(sock, NULL, NULL);
		if (client2 < 0)
			die("accept");

		write(client2, "\01", 1);
	}
	write(client1, "\0", 1);

	printf("enter event loop\n");
	pfd[0].fd = client1;
	pfd[1].fd = client2;
	pfd[0].events = pfd[1].events = POLLIN;
	for (;;) {
		rc = poll(pfd, single ? 1 : 2, -1);
		if (rc < 0)
			die("poll");
		if (pfd[0].revents & POLLIN) {
			len = read(client1, buf, sizeof(buf));
			if (len < 0) {
				die("read from client 1");
				break;
			}
			if (len == 0) {
				printf("client 1 EOF\n");
				break;
			}
			rc = write(single ? client1 : client2, buf, len);
			if (len != rc) {
				die("write to client 2");
				break;
			}
			cnt++;
		}
		if (pfd[1].revents & POLLIN) {
			len = read(client2, buf, sizeof(buf));
			if (len < 0) {
				die("read from client 2");
				break;
			}
			if (len == 0) {
				printf("client 2 EOF\n");
				break;
			}
			rc = write(client1, buf, len);
			if (len != rc) {
				die("write to client 1");
				break;
			}
			cnt++;
		}
	}
	printf("passed %lu messages\n", cnt);
	return EXIT_SUCCESS;
}
/* UDS client */

#include <alloca.h>
#include <errno.h>
#include <poll.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/timerfd.h>
#include <sys/un.h>


// use abstract address
#define SOCKET "\0udsbench.socket"

static int die(const char *msg)
{
	if (errno)
		fprintf(stderr, "%s: error %d %m\n", msg, errno);
	else
		fprintf(stderr, "%s\n", msg);
	if (errno != EPIPE)
		exit(EXIT_FAILURE);
	return 0;
}

int main(int argc, char *argv[])
{
	struct sockaddr_un addr = {
		.sun_family = AF_UNIX,
		.sun_path = SOCKET,
	};
	int sock, rc, client = 1, i;
	char *buf;
	struct timespec start, end;
	double duration;
	bool async = false;
	struct pollfd pfd;
	typeof(&read) f1, f2;
	long msglen, loops;

	if (argc != 4)
		die("usage: client {sync|async} msglen loops");
	if (!strcmp(argv[1], "async"))
		async = true;
	msglen = strtoul(argv[2], NULL, 0);
	loops = strtoul(argv[3], NULL, 0);
	printf("running in %s mode, msg size %lu, %lu loops\n",
	       async ? "async" : "sync", msglen, loops);
	buf = alloca(msglen);

	sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
	if (sock < 0)
		die("can't create socket");
	if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0)
		die("can't connect");

	printf("waiting for other client\n");
	rc = read(sock, buf, 1);
	if (rc != 1) {
		die("read");
		exit(EXIT_FAILURE);
	}
	if (buf[0] != '\0')
		client = 2;
	printf("this is client %d\n", client);
	clock_gettime(CLOCK_MONOTONIC, &start);
	if (client == 1)
		f1 = (typeof(&read))write, f2 = read;
	else
		f1 = read, f2 = (typeof(&read))write;
	if (async && client == 1) {
		pfd.fd = sock;
		pfd.events = POLLIN | POLLOUT;
		for (i = 0; i < loops; ) {
			rc = poll(&pfd, 1, -1);
			if (rc == -1)
				die("poll");
			if (pfd.revents & POLLOUT) {
				rc = write(sock, buf, msglen);
				if (rc != msglen) {
					die("write");
					break;
				}
			}
			if (pfd.revents & POLLIN) {
				rc = read(sock, buf, msglen);
				if (rc != msglen) {
					die("read");
					break;
				}
				i++;
			}
		}
	}
	else {
		for (i = 0; i < loops; i++) {
			rc = f1(sock, buf, msglen);
			if (rc != msglen) {
				die(f1 == read ? "read" : "write");
				break;
			}
			rc = f2(sock, buf, msglen);
			if (rc != msglen) {
				die(f2 == read ? "read" : "write");
				break;
			}
		}
	}
	clock_gettime(CLOCK_MONOTONIC, &end);
	duration = end.tv_sec - start.tv_sec +
		(end.tv_nsec - start.tv_nsec) * 1e-9;
	printf("%f s, %.0f msg/s, %.1f us/msg, %.3f MB/s\n", duration,
	       loops / duration, duration * 1e6 / loops,
	       (loops * msglen >> 20) / duration);
	return EXIT_SUCCESS;
}

CFLAGS := -O3 -Wall
CC := gcc

all: client server

clean:
        rm -f client server

Reply via email to