Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Alexey Budankov
Hi,

On 05.10.2018 10:22, Namhyung Kim wrote:
> On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:

>> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = {
>>"signal"),
>>  OPT_BOOLEAN(0, "dry-run", _run,
>>  "Parse options then exit"),
>> +#ifdef HAVE_AIO_SUPPORT
>> +OPT_INTEGER(0, "aio-cblocks", _cblocks,
>> +"Max number of simultaneous per-mmap trace writes (default: 
>> 0 - serial, max: 4)"),
>> +#endif
>>  OPT_END()
>>  };
> 
> Please update the documentation for the new option and config

Accepted.

> variable.  Perhaps it can provide the option always and check the
> availability at runtime (with a warning message if not enabled).

Not sure if this complication is really required. 
So far NO_AIO define is possible at compile time.

Thanks,
Alexey


Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Alexey Budankov
Hi,

On 05.10.2018 10:22, Namhyung Kim wrote:
> On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:

>> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = {
>>"signal"),
>>  OPT_BOOLEAN(0, "dry-run", _run,
>>  "Parse options then exit"),
>> +#ifdef HAVE_AIO_SUPPORT
>> +OPT_INTEGER(0, "aio-cblocks", _cblocks,
>> +"Max number of simultaneous per-mmap trace writes (default: 
>> 0 - serial, max: 4)"),
>> +#endif
>>  OPT_END()
>>  };
> 
> Please update the documentation for the new option and config

Accepted.

> variable.  Perhaps it can provide the option always and check the
> availability at runtime (with a warning message if not enabled).

Not sure if this complication is really required. 
So far NO_AIO define is possible at compile time.

Thanks,
Alexey


Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Namhyung Kim
On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:
> 
> Multi AIO trace writing allows caching more kernel data into userspace 
> memory postponing trace writing for the sake of overall profiling data 
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
> 
> With aio-cblocks option value different from 0, current default value, 
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
> 
> That allows avoiding suspend at record__aio_sync() between calls of 
> record__mmap_read_evlist() and increase profiling data thruput for 
> the cost of userspace memory.
> 
> Signed-off-by: Alexey Budankov 
> ---
>  tools/perf/builtin-record.c | 59 +++--
>  tools/perf/util/evlist.c|  7 ++--
>  tools/perf/util/evlist.h|  3 +-
>  tools/perf/util/mmap.c  | 79 
> -
>  tools/perf/util/mmap.h  |  7 ++--
>  5 files changed, 115 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index c63fe8c021a2..7b67574be5b7 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
> struct aiocb *cblock)
>   return rc;
>  }
>  
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
>  {
> - struct aiocb *cblock = >cblock;
> + struct aiocb **aiocb = md->aiocb;
> + struct aiocb *cblocks = md->cblocks;
>   struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
> + int i, do_suspend;
>  
>   do {
> - if (cblock->aio_fildes == -1 || record__aio_complete(md, 
> cblock))
> - return;
> + do_suspend = 0;
> + for (i = 0; i < md->nr_cblocks; ++i) {
> + if (cblocks[i].aio_fildes == -1 || 
> record__aio_complete(md, [i])) {
> + if (sync_all)
> + aiocb[i] = NULL;
> + else
> + return i;
> + } else {
> + /*
> +  * Started aio write is not complete yet
> +  * so it has to be waited before the
> +  * next allocation.
> +  */
> + aiocb[i] = [i];
> + do_suspend = 1;
> + }
> + }
> + if (!do_suspend)
> + return -1;
>  
> - while (aio_suspend((const struct aiocb**), 1, )) 
> {
> + while (aio_suspend((const struct aiocb **)aiocb, 
> md->nr_cblocks, )) {
>   if (!(errno == EAGAIN || errno == EINTR))
>   pr_err("failed to sync perf data, error: %m\n");
>   }
> @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
>  struct perf_evlist *evlist)
>  {
>   struct record_opts *opts = >opts;
> + int nr_cblocks = 0;
>   char msg[512];
> -
> +#ifdef HAVE_AIO_SUPPORT
> + nr_cblocks = opts->nr_cblocks;
> +#endif
>   if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
>opts->auxtrace_mmap_pages,
> -  opts->auxtrace_snapshot_mode) < 0) {
> +  opts->auxtrace_snapshot_mode,
> +  nr_cblocks) < 0) {
>   if (errno == EPERM) {
>   pr_err("Permission error mapping pages.\n"
>  "Consider increasing "
> @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec 
> __maybe_unused)
>   for (i = 0; i < evlist->nr_mmaps; i++) {
>   struct perf_mmap *map = [i];
>   if (map->base)
> - record__aio_sync(map);
> + record__aio_sync(map, true);
>   }
>  #endif
>  }
> @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, 
> struct perf_evlist *evli
>   goto out;
>   }
>   } else {
> + int idx;
>   /*
>* Call record__aio_sync() to wait till 
> map->data buffer
>* becomes available after previous aio write 
> request.
>*/
> - record__aio_sync(map);
> - if (perf_mmap__aio_push(map, rec, 
> record__aio_pushfn) != 0) {
> + idx = record__aio_sync(map, false);
> + if (perf_mmap__aio_push(map, rec, idx, 
> 

Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Namhyung Kim
On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:
> 
> Multi AIO trace writing allows caching more kernel data into userspace 
> memory postponing trace writing for the sake of overall profiling data 
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
> 
> With aio-cblocks option value different from 0, current default value, 
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
> 
> That allows avoiding suspend at record__aio_sync() between calls of 
> record__mmap_read_evlist() and increase profiling data thruput for 
> the cost of userspace memory.
> 
> Signed-off-by: Alexey Budankov 
> ---
>  tools/perf/builtin-record.c | 59 +++--
>  tools/perf/util/evlist.c|  7 ++--
>  tools/perf/util/evlist.h|  3 +-
>  tools/perf/util/mmap.c  | 79 
> -
>  tools/perf/util/mmap.h  |  7 ++--
>  5 files changed, 115 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index c63fe8c021a2..7b67574be5b7 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
> struct aiocb *cblock)
>   return rc;
>  }
>  
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
>  {
> - struct aiocb *cblock = >cblock;
> + struct aiocb **aiocb = md->aiocb;
> + struct aiocb *cblocks = md->cblocks;
>   struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
> + int i, do_suspend;
>  
>   do {
> - if (cblock->aio_fildes == -1 || record__aio_complete(md, 
> cblock))
> - return;
> + do_suspend = 0;
> + for (i = 0; i < md->nr_cblocks; ++i) {
> + if (cblocks[i].aio_fildes == -1 || 
> record__aio_complete(md, [i])) {
> + if (sync_all)
> + aiocb[i] = NULL;
> + else
> + return i;
> + } else {
> + /*
> +  * Started aio write is not complete yet
> +  * so it has to be waited before the
> +  * next allocation.
> +  */
> + aiocb[i] = [i];
> + do_suspend = 1;
> + }
> + }
> + if (!do_suspend)
> + return -1;
>  
> - while (aio_suspend((const struct aiocb**), 1, )) 
> {
> + while (aio_suspend((const struct aiocb **)aiocb, 
> md->nr_cblocks, )) {
>   if (!(errno == EAGAIN || errno == EINTR))
>   pr_err("failed to sync perf data, error: %m\n");
>   }
> @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
>  struct perf_evlist *evlist)
>  {
>   struct record_opts *opts = >opts;
> + int nr_cblocks = 0;
>   char msg[512];
> -
> +#ifdef HAVE_AIO_SUPPORT
> + nr_cblocks = opts->nr_cblocks;
> +#endif
>   if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
>opts->auxtrace_mmap_pages,
> -  opts->auxtrace_snapshot_mode) < 0) {
> +  opts->auxtrace_snapshot_mode,
> +  nr_cblocks) < 0) {
>   if (errno == EPERM) {
>   pr_err("Permission error mapping pages.\n"
>  "Consider increasing "
> @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec 
> __maybe_unused)
>   for (i = 0; i < evlist->nr_mmaps; i++) {
>   struct perf_mmap *map = [i];
>   if (map->base)
> - record__aio_sync(map);
> + record__aio_sync(map, true);
>   }
>  #endif
>  }
> @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, 
> struct perf_evlist *evli
>   goto out;
>   }
>   } else {
> + int idx;
>   /*
>* Call record__aio_sync() to wait till 
> map->data buffer
>* becomes available after previous aio write 
> request.
>*/
> - record__aio_sync(map);
> - if (perf_mmap__aio_push(map, rec, 
> record__aio_pushfn) != 0) {
> + idx = record__aio_sync(map, false);
> + if (perf_mmap__aio_push(map, rec, idx, 
>