On Fri, Jun 21, 2013 at 06:01:08PM +0200, Holger Hans Peter Freyther wrote:
> Do you have an idea on how this could be done? Hi, first of all the MMAP cache is not why journald is slow but that is for another mail/benchmark. I was just curious if the mmap cache is premature optimization or if it brings a speed up. So what I did was to patch journal-file.c, mmap_cache.c and record the calls made to posix_fallocate, fstat and mmap_cache_get and write the parameters to a file. I created a small tool to replay these operations. This way I can play with the mmap_cache.c and see if my changes make a difference. I am attaching my hacked together patch as a reference. The journald will exit when it would normally rotate the log file. system: This was tested on a TI Davinci DM644x. The ARM core runs at 405 Mhz and the system has 256 mib DRAM. It is running Linux 3.2.40, the userspace is post Poky 9.0.0 (glibc 2.17, gcc 4.7..) workload/test: I was using the following to generate log messages as my example workload. This resulted in a 19MB file with commands. while true; do (for i in `seq 1 100`; do echo "Log message... $RANDOM"; done) | logger; done Baseline replay with just iterating over the commands: root@sysmobts-v2:~# time ./replay-mmap-cache-no-work DONE real 0m0.275s user 0m0.080s sys 0m0.180s Replay with WINDOWS_MIN 64 (just the best, lowest) root@sysmobts-v2:~# time ./replay-mmap-cache-min-64 DONE real 0m3.427s user 0m1.200s sys 0m2.000s Replay with WINDOWS_MIN 0 (just the slowest) root@sysmobts-v2:~# time ./replay-mmap-cache-min-0 DONE real 0m2.212s user 0m1.010s sys 0m1.040s So unless there is an issue with my recording/replay I think that besides my opinion that mapping a < 4MB file 65 times is ugly, it also appears to be slower for the above workload in journald. kind regards holger
>From ba1088eb4127206ae3259a7aabd3fc3f4a1943bb Mon Sep 17 00:00:00 2001 From: Holger Hans Peter Freyther <hol...@moiji-mobile.com> Date: Wed, 26 Jun 2013 21:12:57 +0200 Subject: [PATCH] hacks.. for creating a mmap test case.. --- Makefile.am | 8 +++ src/journal/journal-def.h | 27 ++++++++++ src/journal/journal-file.c | 28 ++++++++++ src/journal/journald-server.c | 16 ++++++ src/journal/mmap-cache.c | 40 +++++++++++++++ src/journal/replay-mmap-cache.c | 108 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 227 insertions(+) create mode 100644 src/journal/replay-mmap-cache.c diff --git a/Makefile.am b/Makefile.am index 3a196a6..33118a6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -2697,6 +2697,13 @@ test_mmap_cache_LDADD = \ libsystemd-shared.la \ libsystemd-journal-internal.la +replay_mmap_cache_SOURCES = \ + src/journal/replay-mmap-cache.c + +replay_mmap_cache_LDADD = \ + libsystemd-shared.la \ + libsystemd-journal-internal.la + test_catalog_SOURCES = \ src/journal/test-catalog.c @@ -2866,6 +2873,7 @@ tests += \ test-journal-stream \ test-journal-verify \ test-mmap-cache \ + replay-mmap-cache \ test-catalog pkginclude_HEADERS += \ diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index 7e407a4..460741e 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -214,3 +214,30 @@ struct FSSHeader { le16_t reserved[3]; le64_t fsprg_state_size; } _packed_; + + +struct mmap_cache_data { + uint8_t type; +#define PERF_CMD_POSIX_ALLOCATE 1 +#define PERF_CMD_FSTAT 2 +#define PERF_CMD_FSTAT_CHECK 3 +#define PERF_CMD_MMAP_CACHE 4 + + union { + struct cmd_fallocate { + uint64_t old_size; + uint64_t increment; + } allocate; + struct cmd_fstat { + uint64_t offset; + uint64_t size; + } stat; + struct cmd_mmap { + int prot; + unsigned context; + bool keep_always; + uint64_t offset; + size_t size; + } mmap; + } u; +} _packed_; diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 38499a6..f619de9 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -359,10 +359,22 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) /* Note that the glibc fallocate() fallback is very inefficient, hence we try to minimize the allocation area as we can. */ + struct mmap_cache_data data = { + .type = PERF_CMD_POSIX_ALLOCATE, + .u.allocate = { + .old_size = old_size, + .increment = new_size - old_size, + }, + }; + write_cache_data(&data); r = posix_fallocate(f->fd, old_size, new_size - old_size); if (r != 0) return -r; + struct mmap_cache_data data_stat = { + .type = PERF_CMD_FSTAT, + }; + write_cache_data(&data_stat); if (fstat(f->fd, &f->last_stat) < 0) return -errno; @@ -382,6 +394,14 @@ static int journal_file_move_to(JournalFile *f, int context, bool keep_always, u if (offset + size > (uint64_t) f->last_stat.st_size) { /* Hmm, out of range? Let's refresh the fstat() data * first, before we trust that check. */ + struct mmap_cache_data data = { + .type = PERF_CMD_FSTAT_CHECK, + .u.stat = { + .offset = offset, + .size = size, + }, + }; + write_cache_data(&data); if (fstat(f->fd, &f->last_stat) < 0 || offset + size > (uint64_t) f->last_stat.st_size) @@ -2415,6 +2435,10 @@ int journal_file_open( goto fail; } + struct mmap_cache_data data_stat = { + .type = PERF_CMD_FSTAT, + }; + write_cache_data(&data_stat); if (fstat(f->fd, &f->last_stat) < 0) { r = -errno; goto fail; @@ -2452,6 +2476,10 @@ int journal_file_open( if (r < 0) goto fail; + struct mmap_cache_data data_stat = { + .type = PERF_CMD_FSTAT, + }; + write_cache_data(&data_stat); if (fstat(f->fd, &f->last_stat) < 0) { r = -errno; goto fail; diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index cc52b8a..e348c31 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -89,6 +89,20 @@ static const char* const split_mode_table[] = { DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode); DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting"); +static const char *cached_cg_root(void) { + static char *cached = NULL; + + int r; + + if (!cached) { + r = cg_get_root_path(&cached); + if (r < 0) + cached = NULL; + } + + return cached; +} + static uint64_t available_space(Server *s) { char ids[33]; _cleanup_free_ char *p = NULL; @@ -340,6 +354,8 @@ void server_rotate(Server *s) { server_fix_perms(s, f, PTR_TO_UINT32(k)); } } + + exit(0); } void server_sync(Server *s) { diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c index 767f555..46ab891 100644 --- a/src/journal/mmap-cache.c +++ b/src/journal/mmap-cache.c @@ -30,11 +30,35 @@ #include "util.h" #include "macro.h" #include "mmap-cache.h" +#include "journal-def.h" + + #include <sys/stat.h> + #include <fcntl.h> + typedef struct Window Window; typedef struct Context Context; typedef struct FileDescriptor FileDescriptor; +int no_write_cache = 0; + +void write_cache_data(struct mmap_cache_data *data) +{ + if (no_write_cache) + return; + + static int fd = -1; + if (fd == -1) { + fd = open("mmap.binary", O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + abort(); + } + + int rc = write(fd, data, sizeof(*data)); + if (rc != sizeof(*data)) + abort(); +} + struct Window { MMapCache *cache; @@ -530,6 +554,20 @@ int mmap_cache_get( int r; + struct mmap_cache_data data = { + .type = PERF_CMD_MMAP_CACHE, + .u.mmap = { + .prot = prot, + .context = context, + .keep_always = keep_always, + .offset = offset, + .size = size, + }, + }; + write_cache_data(&data); +// printf("mmap_cache_get(the_cache, fd, %d, %u, %d, %llu, %zu, &the_stat, &res);\n", +// prot, context, keep_always, offset, size); + assert(m); assert(m->n_ref > 0); assert(fd >= 0); @@ -563,6 +601,7 @@ void mmap_cache_close_fd(MMapCache *m, int fd) { fd_free(f); } +#if 0 void mmap_cache_close_context(MMapCache *m, unsigned context) { Context *c; @@ -574,3 +613,4 @@ void mmap_cache_close_context(MMapCache *m, unsigned context) { context_free(c); } +#endif diff --git a/src/journal/replay-mmap-cache.c b/src/journal/replay-mmap-cache.c new file mode 100644 index 0000000..2fe9a8f --- /dev/null +++ b/src/journal/replay-mmap-cache.c @@ -0,0 +1,108 @@ + +#include <stdlib.h> +#include <stdio.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "mmap-cache.h" +#include "journal-def.h" + +extern int no_write_cache; + +int main(int argc, char **argv) +{ + char *addr; + int op_fd; + struct stat stat; + size_t items, i; + struct mmap_cache_data *commands; + + int journal_fd, rc; + MMapCache *m; + struct stat journal_stat; + void *ret; + + no_write_cache = 1; + + m = mmap_cache_new(); + op_fd = open("read-mmap.binary", O_RDONLY); + if (op_fd < 0) { + perror("opening failed"); + abort(); + } + + if (fstat(op_fd, &stat) == -1) { + perror("Stat failed"); + abort(); + } + /* check if we have a multiple of the struct size */ + if ((stat.st_size % sizeof(struct mmap_cache_data)) != 0) { + perror("Truncated mmap.binary"); + abort(); + } + items = stat.st_size / sizeof(struct mmap_cache_data); + + addr = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, op_fd, 0); + if (addr == MAP_FAILED) { + perror("MMAP failed"); + abort(); + } + commands = (struct mmap_cache_data *) addr; + + journal_fd = open("/run/test_journal", O_RDWR | O_CREAT | O_TRUNC, 0644); + if (journal_fd < 0) { + perror("opening of test journal failed"); + abort(); + } + + for (i = 0; i < items; ++i) { + switch (commands[i].type) { + case PERF_CMD_POSIX_ALLOCATE: + if (posix_fallocate(journal_fd, + commands[i].u.allocate.old_size, + commands[i].u.allocate.increment) != 0) { + abort(); + } + break; + case PERF_CMD_FSTAT: + if (fstat(journal_fd, &journal_stat) < 0) { + abort(); + } + break; + case PERF_CMD_FSTAT_CHECK: + if (fstat(journal_fd, &journal_stat) < 0) { + abort(); + } + if (commands[i].u.stat.offset + commands[i].u.stat.size > (uint64_t) journal_stat.st_size) { + abort(); + } + break; + case PERF_CMD_MMAP_CACHE: + rc = mmap_cache_get(m, journal_fd, + commands[i].u.mmap.prot, + commands[i].u.mmap.context, + commands[i].u.mmap.keep_always, + commands[i].u.mmap.offset, + commands[i].u.mmap.size, + &journal_stat, &ret); + if (rc < 0) { + printf("the cached returned: %d %s on items %zu\n", + rc, strerror(abs(rc)), i); + abort(); + } + /* in case the underlying page is not mapped, at least generate + a page fault here... */ +// memset(ret, 0xb, commands[i].u.mmap.size); + ((char *) ret)[0] = 'b'; + break; + default: + printf("BAD TYPE: %d at %zu\n", commands[i].type, i); + abort(); + } + } + + mmap_cache_unref(m); + printf("DONE\n"); + return EXIT_SUCCESS; +} -- 1.7.10.4
_______________________________________________ systemd-devel mailing list systemd-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/systemd-devel