On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:
> 
> Multi AIO trace writing allows caching more kernel data into userspace 
> memory postponing trace writing for the sake of overall profiling data 
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
> 
> With aio-cblocks option value different from 0, current default value, 
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
> 
> That allows avoiding suspend at record__aio_sync() between calls of 
> record__mmap_read_evlist() and increase profiling data thruput for 
> the cost of userspace memory.
> 
> Signed-off-by: Alexey Budankov <alexey.budan...@linux.intel.com>
> ---
>  tools/perf/builtin-record.c | 59 +++++++++++++++++++++++++++------
>  tools/perf/util/evlist.c    |  7 ++--
>  tools/perf/util/evlist.h    |  3 +-
>  tools/perf/util/mmap.c      | 79 
> ++++++++++++++++++++++++++++++++-------------
>  tools/perf/util/mmap.h      |  7 ++--
>  5 files changed, 115 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index c63fe8c021a2..7b67574be5b7 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
> struct aiocb *cblock)
>       return rc;
>  }
>  
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
>  {
> -     struct aiocb *cblock = &md->cblock;
> +     struct aiocb **aiocb = md->aiocb;
> +     struct aiocb *cblocks = md->cblocks;
>       struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
> +     int i, do_suspend;
>  
>       do {
> -             if (cblock->aio_fildes == -1 || record__aio_complete(md, 
> cblock))
> -                     return;
> +             do_suspend = 0;
> +             for (i = 0; i < md->nr_cblocks; ++i) {
> +                     if (cblocks[i].aio_fildes == -1 || 
> record__aio_complete(md, &cblocks[i])) {
> +                             if (sync_all)
> +                                     aiocb[i] = NULL;
> +                             else
> +                                     return i;
> +                     } else {
> +                             /*
> +                              * Started aio write is not complete yet
> +                              * so it has to be waited before the
> +                              * next allocation.
> +                              */
> +                             aiocb[i] = &cblocks[i];
> +                             do_suspend = 1;
> +                     }
> +             }
> +             if (!do_suspend)
> +                     return -1;
>  
> -             while (aio_suspend((const struct aiocb**)&cblock, 1, &timeout)) 
> {
> +             while (aio_suspend((const struct aiocb **)aiocb, 
> md->nr_cblocks, &timeout)) {
>                       if (!(errno == EAGAIN || errno == EINTR))
>                               pr_err("failed to sync perf data, error: %m\n");
>               }
> @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
>                              struct perf_evlist *evlist)
>  {
>       struct record_opts *opts = &rec->opts;
> +     int nr_cblocks = 0;
>       char msg[512];
> -
> +#ifdef HAVE_AIO_SUPPORT
> +     nr_cblocks = opts->nr_cblocks;
> +#endif
>       if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
>                                opts->auxtrace_mmap_pages,
> -                              opts->auxtrace_snapshot_mode) < 0) {
> +                              opts->auxtrace_snapshot_mode,
> +                              nr_cblocks) < 0) {
>               if (errno == EPERM) {
>                       pr_err("Permission error mapping pages.\n"
>                              "Consider increasing "
> @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec 
> __maybe_unused)
>       for (i = 0; i < evlist->nr_mmaps; i++) {
>               struct perf_mmap *map = &maps[i];
>               if (map->base)
> -                     record__aio_sync(map);
> +                     record__aio_sync(map, true);
>       }
>  #endif
>  }
> @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, 
> struct perf_evlist *evli
>                                       goto out;
>                               }
>                       } else {
> +                             int idx;
>                               /*
>                                * Call record__aio_sync() to wait till 
> map->data buffer
>                                * becomes available after previous aio write 
> request.
>                                */
> -                             record__aio_sync(map);
> -                             if (perf_mmap__aio_push(map, rec, 
> record__aio_pushfn) != 0) {
> +                             idx = record__aio_sync(map, false);
> +                             if (perf_mmap__aio_push(map, rec, idx, 
> record__aio_pushfn) != 0) {
>                                       rc = -1;
>                                       goto out;
>                               }
> @@ -1442,6 +1466,10 @@ static int perf_record_config(const char *var, const 
> char *value, void *cb)
>               var = "call-graph.record-mode";
>               return perf_default_config(var, value, cb);
>       }
> +#ifdef HAVE_AIO_SUPPORT
> +     if (!strcmp(var, "record.aio-cblocks"))
> +             rec->opts.nr_cblocks = strtol(value, NULL, 0);
> +#endif
>  
>       return 0;
>  }
> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = {
>                         "signal"),
>       OPT_BOOLEAN(0, "dry-run", &dry_run,
>                   "Parse options then exit"),
> +#ifdef HAVE_AIO_SUPPORT
> +     OPT_INTEGER(0, "aio-cblocks", &record.opts.nr_cblocks,
> +                 "Max number of simultaneous per-mmap trace writes (default: 
> 0 - serial, max: 4)"),
> +#endif
>       OPT_END()
>  };

Please update the documentation for the new option and config
variable.  Perhaps it can provide the option always and check the
availability at runtime (with a warning message if not enabled).

Thanks,
Namhyung


>  
> @@ -2025,6 +2057,13 @@ int cmd_record(int argc, const char **argv)
>               goto out;
>       }
>  
> +#ifdef HAVE_AIO_SUPPORT
> +     if (rec->opts.nr_cblocks > 4)
> +             rec->opts.nr_cblocks = 4;
> +     if (verbose > 0)
> +             pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
> +#endif
> +
>       err = __cmd_record(&record, argc, argv);
>  out:
>       perf_evlist__delete(rec->evlist);

Reply via email to