Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO
Hi, On 05.10.2018 10:22, Namhyung Kim wrote: > On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote: >> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = { >>"signal"), >> OPT_BOOLEAN(0, "dry-run", _run, >> "Parse options then exit"), >> +#ifdef HAVE_AIO_SUPPORT >> +OPT_INTEGER(0, "aio-cblocks", _cblocks, >> +"Max number of simultaneous per-mmap trace writes (default: >> 0 - serial, max: 4)"), >> +#endif >> OPT_END() >> }; > > Please update the documentation for the new option and config Accepted. > variable. Perhaps it can provide the option always and check the > availability at runtime (with a warning message if not enabled). Not sure if this complication is really required. So far NO_AIO define is possible at compile time. Thanks, Alexey
Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO
Hi, On 05.10.2018 10:22, Namhyung Kim wrote: > On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote: >> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = { >>"signal"), >> OPT_BOOLEAN(0, "dry-run", _run, >> "Parse options then exit"), >> +#ifdef HAVE_AIO_SUPPORT >> +OPT_INTEGER(0, "aio-cblocks", _cblocks, >> +"Max number of simultaneous per-mmap trace writes (default: >> 0 - serial, max: 4)"), >> +#endif >> OPT_END() >> }; > > Please update the documentation for the new option and config Accepted. > variable. Perhaps it can provide the option always and check the > availability at runtime (with a warning message if not enabled). Not sure if this complication is really required. So far NO_AIO define is possible at compile time. Thanks, Alexey
Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO
On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote: > > Multi AIO trace writing allows caching more kernel data into userspace > memory postponing trace writing for the sake of overall profiling data > thruput increase. It could be seen as kernel data buffer extension into > userspace memory. > > With aio-cblocks option value different from 0, current default value, > tool has capability to cache more and more data into user space > along with delegating spill to AIO. > > That allows avoiding suspend at record__aio_sync() between calls of > record__mmap_read_evlist() and increase profiling data thruput for > the cost of userspace memory. > > Signed-off-by: Alexey Budankov > --- > tools/perf/builtin-record.c | 59 +++-- > tools/perf/util/evlist.c| 7 ++-- > tools/perf/util/evlist.h| 3 +- > tools/perf/util/mmap.c | 79 > - > tools/perf/util/mmap.h | 7 ++-- > 5 files changed, 115 insertions(+), 40 deletions(-) > > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c > index c63fe8c021a2..7b67574be5b7 100644 > --- a/tools/perf/builtin-record.c > +++ b/tools/perf/builtin-record.c > @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, > struct aiocb *cblock) > return rc; > } > > -static void record__aio_sync(struct perf_mmap *md) > +static int record__aio_sync(struct perf_mmap *md, bool sync_all) > { > - struct aiocb *cblock = >cblock; > + struct aiocb **aiocb = md->aiocb; > + struct aiocb *cblocks = md->cblocks; > struct timespec timeout = { 0, 1000 * 1000 * 1 }; // 1ms > + int i, do_suspend; > > do { > - if (cblock->aio_fildes == -1 || record__aio_complete(md, > cblock)) > - return; > + do_suspend = 0; > + for (i = 0; i < md->nr_cblocks; ++i) { > + if (cblocks[i].aio_fildes == -1 || > record__aio_complete(md, [i])) { > + if (sync_all) > + aiocb[i] = NULL; > + else > + return i; > + } else { > + /* > + * Started aio write is not complete yet > + * so it has to be waited before the > + * next allocation. > + */ > + aiocb[i] = [i]; > + do_suspend = 1; > + } > + } > + if (!do_suspend) > + return -1; > > - while (aio_suspend((const struct aiocb**), 1, )) > { > + while (aio_suspend((const struct aiocb **)aiocb, > md->nr_cblocks, )) { > if (!(errno == EAGAIN || errno == EINTR)) > pr_err("failed to sync perf data, error: %m\n"); > } > @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec, > struct perf_evlist *evlist) > { > struct record_opts *opts = >opts; > + int nr_cblocks = 0; > char msg[512]; > - > +#ifdef HAVE_AIO_SUPPORT > + nr_cblocks = opts->nr_cblocks; > +#endif > if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, >opts->auxtrace_mmap_pages, > - opts->auxtrace_snapshot_mode) < 0) { > + opts->auxtrace_snapshot_mode, > + nr_cblocks) < 0) { > if (errno == EPERM) { > pr_err("Permission error mapping pages.\n" > "Consider increasing " > @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec > __maybe_unused) > for (i = 0; i < evlist->nr_mmaps; i++) { > struct perf_mmap *map = [i]; > if (map->base) > - record__aio_sync(map); > + record__aio_sync(map, true); > } > #endif > } > @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, > struct perf_evlist *evli > goto out; > } > } else { > + int idx; > /* >* Call record__aio_sync() to wait till > map->data buffer >* becomes available after previous aio write > request. >*/ > - record__aio_sync(map); > - if (perf_mmap__aio_push(map, rec, > record__aio_pushfn) != 0) { > + idx = record__aio_sync(map, false); > + if (perf_mmap__aio_push(map, rec, idx, >
Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO
On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote: > > Multi AIO trace writing allows caching more kernel data into userspace > memory postponing trace writing for the sake of overall profiling data > thruput increase. It could be seen as kernel data buffer extension into > userspace memory. > > With aio-cblocks option value different from 0, current default value, > tool has capability to cache more and more data into user space > along with delegating spill to AIO. > > That allows avoiding suspend at record__aio_sync() between calls of > record__mmap_read_evlist() and increase profiling data thruput for > the cost of userspace memory. > > Signed-off-by: Alexey Budankov > --- > tools/perf/builtin-record.c | 59 +++-- > tools/perf/util/evlist.c| 7 ++-- > tools/perf/util/evlist.h| 3 +- > tools/perf/util/mmap.c | 79 > - > tools/perf/util/mmap.h | 7 ++-- > 5 files changed, 115 insertions(+), 40 deletions(-) > > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c > index c63fe8c021a2..7b67574be5b7 100644 > --- a/tools/perf/builtin-record.c > +++ b/tools/perf/builtin-record.c > @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, > struct aiocb *cblock) > return rc; > } > > -static void record__aio_sync(struct perf_mmap *md) > +static int record__aio_sync(struct perf_mmap *md, bool sync_all) > { > - struct aiocb *cblock = >cblock; > + struct aiocb **aiocb = md->aiocb; > + struct aiocb *cblocks = md->cblocks; > struct timespec timeout = { 0, 1000 * 1000 * 1 }; // 1ms > + int i, do_suspend; > > do { > - if (cblock->aio_fildes == -1 || record__aio_complete(md, > cblock)) > - return; > + do_suspend = 0; > + for (i = 0; i < md->nr_cblocks; ++i) { > + if (cblocks[i].aio_fildes == -1 || > record__aio_complete(md, [i])) { > + if (sync_all) > + aiocb[i] = NULL; > + else > + return i; > + } else { > + /* > + * Started aio write is not complete yet > + * so it has to be waited before the > + * next allocation. > + */ > + aiocb[i] = [i]; > + do_suspend = 1; > + } > + } > + if (!do_suspend) > + return -1; > > - while (aio_suspend((const struct aiocb**), 1, )) > { > + while (aio_suspend((const struct aiocb **)aiocb, > md->nr_cblocks, )) { > if (!(errno == EAGAIN || errno == EINTR)) > pr_err("failed to sync perf data, error: %m\n"); > } > @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec, > struct perf_evlist *evlist) > { > struct record_opts *opts = >opts; > + int nr_cblocks = 0; > char msg[512]; > - > +#ifdef HAVE_AIO_SUPPORT > + nr_cblocks = opts->nr_cblocks; > +#endif > if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, >opts->auxtrace_mmap_pages, > - opts->auxtrace_snapshot_mode) < 0) { > + opts->auxtrace_snapshot_mode, > + nr_cblocks) < 0) { > if (errno == EPERM) { > pr_err("Permission error mapping pages.\n" > "Consider increasing " > @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec > __maybe_unused) > for (i = 0; i < evlist->nr_mmaps; i++) { > struct perf_mmap *map = [i]; > if (map->base) > - record__aio_sync(map); > + record__aio_sync(map, true); > } > #endif > } > @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, > struct perf_evlist *evli > goto out; > } > } else { > + int idx; > /* >* Call record__aio_sync() to wait till > map->data buffer >* becomes available after previous aio write > request. >*/ > - record__aio_sync(map); > - if (perf_mmap__aio_push(map, rec, > record__aio_pushfn) != 0) { > + idx = record__aio_sync(map, false); > + if (perf_mmap__aio_push(map, rec, idx, >