[PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Alexey Budankov


Multi AIO trace writing allows caching more kernel data into userspace 
memory postponing trace writing for the sake of overall profiling data 
thruput increase. It could be seen as kernel data buffer extension into
userspace memory.

With aio-cblocks option value different from 0, current default value, 
tool has capability to cache more and more data into user space
along with delegating spill to AIO.

That allows avoiding suspend at record__aio_sync() between calls of 
record__mmap_read_evlist() and increase profiling data thruput for 
the cost of userspace memory.

Signed-off-by: Alexey Budankov 
---
Changes in v10:
- added description of aio-cblocks option into perf-record.txt
---
 tools/perf/Documentation/perf-record.txt |  6 +++
 tools/perf/builtin-record.c  | 58 +++-
 tools/perf/util/evlist.c |  6 +--
 tools/perf/util/evlist.h |  2 +-
 tools/perf/util/mmap.c   | 76 ++--
 tools/perf/util/mmap.h   |  7 +--
 6 files changed, 115 insertions(+), 40 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 246dee081efd..7f43a05908ed 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -435,6 +435,12 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--aio-cblocks 
+Use  control blocks for asynchronous (Posix AIO) trace writing (max: 4).
+Default value is 0 that enables serial (none-AIO) trace writing mode.
+AIO mode is supported only when linking Perf tool binary with libc library
+providing implementation for Posix AIO API.
+
 --all-kernel::
 Configure all used events to run in kernel space.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b2d38cb52650..f857edd4b905 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
struct aiocb *cblock)
return rc;
 }
 
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
 {
-   struct aiocb *cblock = >cblock;
+   struct aiocb **aiocb = md->aiocb;
+   struct aiocb *cblocks = md->cblocks;
struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
+   int i, do_suspend;
 
do {
-   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
-   return;
+   do_suspend = 0;
+   for (i = 0; i < md->nr_cblocks; ++i) {
+   if (cblocks[i].aio_fildes == -1 || 
record__aio_complete(md, [i])) {
+   if (sync_all)
+   aiocb[i] = NULL;
+   else
+   return i;
+   } else {
+   /*
+* Started aio write is not complete yet
+* so it has to be waited before the
+* next allocation.
+*/
+   aiocb[i] = [i];
+   do_suspend = 1;
+   }
+   }
+   if (!do_suspend)
+   return -1;
 
-   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   while (aio_suspend((const struct aiocb **)aiocb, 
md->nr_cblocks, )) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
@@ -435,11 +454,14 @@ static int record__mmap_evlist(struct record *rec,
   struct perf_evlist *evlist)
 {
struct record_opts *opts = >opts;
+   int nr_cblocks = 0;
char msg[512];
-
+#ifdef HAVE_AIO_SUPPORT
+   nr_cblocks = opts->nr_cblocks;
+#endif
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 opts->auxtrace_mmap_pages,
-opts->auxtrace_snapshot_mode) < 0) {
+opts->auxtrace_snapshot_mode, nr_cblocks) < 0) 
{
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
   "Consider increasing "
@@ -637,7 +659,7 @@ static void record__mmap_read_sync(struct record *rec)
struct perf_mmap *map = [i];
 
if (map->base)
-   record__aio_sync(map);
+   record__aio_sync(map, true);
}
 }
 #endif
@@ -673,12 +695,13 @@ static int record__mmap_read_evlist(struct record *rec, 
struct perf_evlist *evli
}
 #ifdef HAVE_AIO_SUPPORT
   

[PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Alexey Budankov


Multi AIO trace writing allows caching more kernel data into userspace 
memory postponing trace writing for the sake of overall profiling data 
thruput increase. It could be seen as kernel data buffer extension into
userspace memory.

With aio-cblocks option value different from 0, current default value, 
tool has capability to cache more and more data into user space
along with delegating spill to AIO.

That allows avoiding suspend at record__aio_sync() between calls of 
record__mmap_read_evlist() and increase profiling data thruput for 
the cost of userspace memory.

Signed-off-by: Alexey Budankov 
---
Changes in v10:
- added description of aio-cblocks option into perf-record.txt
---
 tools/perf/Documentation/perf-record.txt |  6 +++
 tools/perf/builtin-record.c  | 58 +++-
 tools/perf/util/evlist.c |  6 +--
 tools/perf/util/evlist.h |  2 +-
 tools/perf/util/mmap.c   | 76 ++--
 tools/perf/util/mmap.h   |  7 +--
 6 files changed, 115 insertions(+), 40 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 246dee081efd..7f43a05908ed 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -435,6 +435,12 @@ Specify vmlinux path which has debuginfo.
 --buildid-all::
 Record build-id of all DSOs regardless whether it's actually hit or not.
 
+--aio-cblocks 
+Use  control blocks for asynchronous (Posix AIO) trace writing (max: 4).
+Default value is 0 that enables serial (none-AIO) trace writing mode.
+AIO mode is supported only when linking Perf tool binary with libc library
+providing implementation for Posix AIO API.
+
 --all-kernel::
 Configure all used events to run in kernel space.
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b2d38cb52650..f857edd4b905 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
struct aiocb *cblock)
return rc;
 }
 
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
 {
-   struct aiocb *cblock = >cblock;
+   struct aiocb **aiocb = md->aiocb;
+   struct aiocb *cblocks = md->cblocks;
struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
+   int i, do_suspend;
 
do {
-   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
-   return;
+   do_suspend = 0;
+   for (i = 0; i < md->nr_cblocks; ++i) {
+   if (cblocks[i].aio_fildes == -1 || 
record__aio_complete(md, [i])) {
+   if (sync_all)
+   aiocb[i] = NULL;
+   else
+   return i;
+   } else {
+   /*
+* Started aio write is not complete yet
+* so it has to be waited before the
+* next allocation.
+*/
+   aiocb[i] = [i];
+   do_suspend = 1;
+   }
+   }
+   if (!do_suspend)
+   return -1;
 
-   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   while (aio_suspend((const struct aiocb **)aiocb, 
md->nr_cblocks, )) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
@@ -435,11 +454,14 @@ static int record__mmap_evlist(struct record *rec,
   struct perf_evlist *evlist)
 {
struct record_opts *opts = >opts;
+   int nr_cblocks = 0;
char msg[512];
-
+#ifdef HAVE_AIO_SUPPORT
+   nr_cblocks = opts->nr_cblocks;
+#endif
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 opts->auxtrace_mmap_pages,
-opts->auxtrace_snapshot_mode) < 0) {
+opts->auxtrace_snapshot_mode, nr_cblocks) < 0) 
{
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
   "Consider increasing "
@@ -637,7 +659,7 @@ static void record__mmap_read_sync(struct record *rec)
struct perf_mmap *map = [i];
 
if (map->base)
-   record__aio_sync(map);
+   record__aio_sync(map, true);
}
 }
 #endif
@@ -673,12 +695,13 @@ static int record__mmap_read_evlist(struct record *rec, 
struct perf_evlist *evli
}
 #ifdef HAVE_AIO_SUPPORT
   

Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Alexey Budankov
Hi,

On 05.10.2018 10:22, Namhyung Kim wrote:
> On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:

>> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = {
>>"signal"),
>>  OPT_BOOLEAN(0, "dry-run", _run,
>>  "Parse options then exit"),
>> +#ifdef HAVE_AIO_SUPPORT
>> +OPT_INTEGER(0, "aio-cblocks", _cblocks,
>> +"Max number of simultaneous per-mmap trace writes (default: 
>> 0 - serial, max: 4)"),
>> +#endif
>>  OPT_END()
>>  };
> 
> Please update the documentation for the new option and config

Accepted.

> variable.  Perhaps it can provide the option always and check the
> availability at runtime (with a warning message if not enabled).

Not sure if this complication is really required. 
So far NO_AIO define is possible at compile time.

Thanks,
Alexey


Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Alexey Budankov
Hi,

On 05.10.2018 10:22, Namhyung Kim wrote:
> On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:

>> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = {
>>"signal"),
>>  OPT_BOOLEAN(0, "dry-run", _run,
>>  "Parse options then exit"),
>> +#ifdef HAVE_AIO_SUPPORT
>> +OPT_INTEGER(0, "aio-cblocks", _cblocks,
>> +"Max number of simultaneous per-mmap trace writes (default: 
>> 0 - serial, max: 4)"),
>> +#endif
>>  OPT_END()
>>  };
> 
> Please update the documentation for the new option and config

Accepted.

> variable.  Perhaps it can provide the option always and check the
> availability at runtime (with a warning message if not enabled).

Not sure if this complication is really required. 
So far NO_AIO define is possible at compile time.

Thanks,
Alexey


Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Namhyung Kim
On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:
> 
> Multi AIO trace writing allows caching more kernel data into userspace 
> memory postponing trace writing for the sake of overall profiling data 
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
> 
> With aio-cblocks option value different from 0, current default value, 
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
> 
> That allows avoiding suspend at record__aio_sync() between calls of 
> record__mmap_read_evlist() and increase profiling data thruput for 
> the cost of userspace memory.
> 
> Signed-off-by: Alexey Budankov 
> ---
>  tools/perf/builtin-record.c | 59 +++--
>  tools/perf/util/evlist.c|  7 ++--
>  tools/perf/util/evlist.h|  3 +-
>  tools/perf/util/mmap.c  | 79 
> -
>  tools/perf/util/mmap.h  |  7 ++--
>  5 files changed, 115 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index c63fe8c021a2..7b67574be5b7 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
> struct aiocb *cblock)
>   return rc;
>  }
>  
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
>  {
> - struct aiocb *cblock = >cblock;
> + struct aiocb **aiocb = md->aiocb;
> + struct aiocb *cblocks = md->cblocks;
>   struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
> + int i, do_suspend;
>  
>   do {
> - if (cblock->aio_fildes == -1 || record__aio_complete(md, 
> cblock))
> - return;
> + do_suspend = 0;
> + for (i = 0; i < md->nr_cblocks; ++i) {
> + if (cblocks[i].aio_fildes == -1 || 
> record__aio_complete(md, [i])) {
> + if (sync_all)
> + aiocb[i] = NULL;
> + else
> + return i;
> + } else {
> + /*
> +  * Started aio write is not complete yet
> +  * so it has to be waited before the
> +  * next allocation.
> +  */
> + aiocb[i] = [i];
> + do_suspend = 1;
> + }
> + }
> + if (!do_suspend)
> + return -1;
>  
> - while (aio_suspend((const struct aiocb**), 1, )) 
> {
> + while (aio_suspend((const struct aiocb **)aiocb, 
> md->nr_cblocks, )) {
>   if (!(errno == EAGAIN || errno == EINTR))
>   pr_err("failed to sync perf data, error: %m\n");
>   }
> @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
>  struct perf_evlist *evlist)
>  {
>   struct record_opts *opts = >opts;
> + int nr_cblocks = 0;
>   char msg[512];
> -
> +#ifdef HAVE_AIO_SUPPORT
> + nr_cblocks = opts->nr_cblocks;
> +#endif
>   if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
>opts->auxtrace_mmap_pages,
> -  opts->auxtrace_snapshot_mode) < 0) {
> +  opts->auxtrace_snapshot_mode,
> +  nr_cblocks) < 0) {
>   if (errno == EPERM) {
>   pr_err("Permission error mapping pages.\n"
>  "Consider increasing "
> @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec 
> __maybe_unused)
>   for (i = 0; i < evlist->nr_mmaps; i++) {
>   struct perf_mmap *map = [i];
>   if (map->base)
> - record__aio_sync(map);
> + record__aio_sync(map, true);
>   }
>  #endif
>  }
> @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, 
> struct perf_evlist *evli
>   goto out;
>   }
>   } else {
> + int idx;
>   /*
>* Call record__aio_sync() to wait till 
> map->data buffer
>* becomes available after previous aio write 
> request.
>*/
> - record__aio_sync(map);
> - if (perf_mmap__aio_push(map, rec, 
> record__aio_pushfn) != 0) {
> + idx = record__aio_sync(map, false);
> + if (perf_mmap__aio_push(map, rec, idx, 
> 

Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-05 Thread Namhyung Kim
On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:
> 
> Multi AIO trace writing allows caching more kernel data into userspace 
> memory postponing trace writing for the sake of overall profiling data 
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
> 
> With aio-cblocks option value different from 0, current default value, 
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
> 
> That allows avoiding suspend at record__aio_sync() between calls of 
> record__mmap_read_evlist() and increase profiling data thruput for 
> the cost of userspace memory.
> 
> Signed-off-by: Alexey Budankov 
> ---
>  tools/perf/builtin-record.c | 59 +++--
>  tools/perf/util/evlist.c|  7 ++--
>  tools/perf/util/evlist.h|  3 +-
>  tools/perf/util/mmap.c  | 79 
> -
>  tools/perf/util/mmap.h  |  7 ++--
>  5 files changed, 115 insertions(+), 40 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index c63fe8c021a2..7b67574be5b7 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
> struct aiocb *cblock)
>   return rc;
>  }
>  
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
>  {
> - struct aiocb *cblock = >cblock;
> + struct aiocb **aiocb = md->aiocb;
> + struct aiocb *cblocks = md->cblocks;
>   struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
> + int i, do_suspend;
>  
>   do {
> - if (cblock->aio_fildes == -1 || record__aio_complete(md, 
> cblock))
> - return;
> + do_suspend = 0;
> + for (i = 0; i < md->nr_cblocks; ++i) {
> + if (cblocks[i].aio_fildes == -1 || 
> record__aio_complete(md, [i])) {
> + if (sync_all)
> + aiocb[i] = NULL;
> + else
> + return i;
> + } else {
> + /*
> +  * Started aio write is not complete yet
> +  * so it has to be waited before the
> +  * next allocation.
> +  */
> + aiocb[i] = [i];
> + do_suspend = 1;
> + }
> + }
> + if (!do_suspend)
> + return -1;
>  
> - while (aio_suspend((const struct aiocb**), 1, )) 
> {
> + while (aio_suspend((const struct aiocb **)aiocb, 
> md->nr_cblocks, )) {
>   if (!(errno == EAGAIN || errno == EINTR))
>   pr_err("failed to sync perf data, error: %m\n");
>   }
> @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
>  struct perf_evlist *evlist)
>  {
>   struct record_opts *opts = >opts;
> + int nr_cblocks = 0;
>   char msg[512];
> -
> +#ifdef HAVE_AIO_SUPPORT
> + nr_cblocks = opts->nr_cblocks;
> +#endif
>   if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
>opts->auxtrace_mmap_pages,
> -  opts->auxtrace_snapshot_mode) < 0) {
> +  opts->auxtrace_snapshot_mode,
> +  nr_cblocks) < 0) {
>   if (errno == EPERM) {
>   pr_err("Permission error mapping pages.\n"
>  "Consider increasing "
> @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec 
> __maybe_unused)
>   for (i = 0; i < evlist->nr_mmaps; i++) {
>   struct perf_mmap *map = [i];
>   if (map->base)
> - record__aio_sync(map);
> + record__aio_sync(map, true);
>   }
>  #endif
>  }
> @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, 
> struct perf_evlist *evli
>   goto out;
>   }
>   } else {
> + int idx;
>   /*
>* Call record__aio_sync() to wait till 
> map->data buffer
>* becomes available after previous aio write 
> request.
>*/
> - record__aio_sync(map);
> - if (perf_mmap__aio_push(map, rec, 
> record__aio_pushfn) != 0) {
> + idx = record__aio_sync(map, false);
> + if (perf_mmap__aio_push(map, rec, idx, 
> 

[PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-03 Thread Alexey Budankov


Multi AIO trace writing allows caching more kernel data into userspace 
memory postponing trace writing for the sake of overall profiling data 
thruput increase. It could be seen as kernel data buffer extension into
userspace memory.

With aio-cblocks option value different from 0, current default value, 
tool has capability to cache more and more data into user space
along with delegating spill to AIO.

That allows avoiding suspend at record__aio_sync() between calls of 
record__mmap_read_evlist() and increase profiling data thruput for 
the cost of userspace memory.

Signed-off-by: Alexey Budankov 
---
 tools/perf/builtin-record.c | 59 +++--
 tools/perf/util/evlist.c|  7 ++--
 tools/perf/util/evlist.h|  3 +-
 tools/perf/util/mmap.c  | 79 -
 tools/perf/util/mmap.h  |  7 ++--
 5 files changed, 115 insertions(+), 40 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c63fe8c021a2..7b67574be5b7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
struct aiocb *cblock)
return rc;
 }
 
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
 {
-   struct aiocb *cblock = >cblock;
+   struct aiocb **aiocb = md->aiocb;
+   struct aiocb *cblocks = md->cblocks;
struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
+   int i, do_suspend;
 
do {
-   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
-   return;
+   do_suspend = 0;
+   for (i = 0; i < md->nr_cblocks; ++i) {
+   if (cblocks[i].aio_fildes == -1 || 
record__aio_complete(md, [i])) {
+   if (sync_all)
+   aiocb[i] = NULL;
+   else
+   return i;
+   } else {
+   /*
+* Started aio write is not complete yet
+* so it has to be waited before the
+* next allocation.
+*/
+   aiocb[i] = [i];
+   do_suspend = 1;
+   }
+   }
+   if (!do_suspend)
+   return -1;
 
-   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   while (aio_suspend((const struct aiocb **)aiocb, 
md->nr_cblocks, )) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
@@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
   struct perf_evlist *evlist)
 {
struct record_opts *opts = >opts;
+   int nr_cblocks = 0;
char msg[512];
-
+#ifdef HAVE_AIO_SUPPORT
+   nr_cblocks = opts->nr_cblocks;
+#endif
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 opts->auxtrace_mmap_pages,
-opts->auxtrace_snapshot_mode) < 0) {
+opts->auxtrace_snapshot_mode,
+nr_cblocks) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
   "Consider increasing "
@@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec 
__maybe_unused)
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *map = [i];
if (map->base)
-   record__aio_sync(map);
+   record__aio_sync(map, true);
}
 #endif
 }
@@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, 
struct perf_evlist *evli
goto out;
}
} else {
+   int idx;
/*
 * Call record__aio_sync() to wait till 
map->data buffer
 * becomes available after previous aio write 
request.
 */
-   record__aio_sync(map);
-   if (perf_mmap__aio_push(map, rec, 
record__aio_pushfn) != 0) {
+   idx = record__aio_sync(map, false);
+   if (perf_mmap__aio_push(map, rec, idx, 
record__aio_pushfn) != 0) {
rc = -1;
goto out;
}
@@ -1442,6 +1466,10 @@ static 

[PATCH v9 3/3]: perf record: extend trace writing to multi AIO

2018-10-03 Thread Alexey Budankov


Multi AIO trace writing allows caching more kernel data into userspace 
memory postponing trace writing for the sake of overall profiling data 
thruput increase. It could be seen as kernel data buffer extension into
userspace memory.

With aio-cblocks option value different from 0, current default value, 
tool has capability to cache more and more data into user space
along with delegating spill to AIO.

That allows avoiding suspend at record__aio_sync() between calls of 
record__mmap_read_evlist() and increase profiling data thruput for 
the cost of userspace memory.

Signed-off-by: Alexey Budankov 
---
 tools/perf/builtin-record.c | 59 +++--
 tools/perf/util/evlist.c|  7 ++--
 tools/perf/util/evlist.h|  3 +-
 tools/perf/util/mmap.c  | 79 -
 tools/perf/util/mmap.h  |  7 ++--
 5 files changed, 115 insertions(+), 40 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c63fe8c021a2..7b67574be5b7 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, 
struct aiocb *cblock)
return rc;
 }
 
-static void record__aio_sync(struct perf_mmap *md)
+static int record__aio_sync(struct perf_mmap *md, bool sync_all)
 {
-   struct aiocb *cblock = >cblock;
+   struct aiocb **aiocb = md->aiocb;
+   struct aiocb *cblocks = md->cblocks;
struct timespec timeout = { 0, 1000 * 1000  * 1 }; // 1ms
+   int i, do_suspend;
 
do {
-   if (cblock->aio_fildes == -1 || record__aio_complete(md, 
cblock))
-   return;
+   do_suspend = 0;
+   for (i = 0; i < md->nr_cblocks; ++i) {
+   if (cblocks[i].aio_fildes == -1 || 
record__aio_complete(md, [i])) {
+   if (sync_all)
+   aiocb[i] = NULL;
+   else
+   return i;
+   } else {
+   /*
+* Started aio write is not complete yet
+* so it has to be waited before the
+* next allocation.
+*/
+   aiocb[i] = [i];
+   do_suspend = 1;
+   }
+   }
+   if (!do_suspend)
+   return -1;
 
-   while (aio_suspend((const struct aiocb**), 1, )) 
{
+   while (aio_suspend((const struct aiocb **)aiocb, 
md->nr_cblocks, )) {
if (!(errno == EAGAIN || errno == EINTR))
pr_err("failed to sync perf data, error: %m\n");
}
@@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
   struct perf_evlist *evlist)
 {
struct record_opts *opts = >opts;
+   int nr_cblocks = 0;
char msg[512];
-
+#ifdef HAVE_AIO_SUPPORT
+   nr_cblocks = opts->nr_cblocks;
+#endif
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
 opts->auxtrace_mmap_pages,
-opts->auxtrace_snapshot_mode) < 0) {
+opts->auxtrace_snapshot_mode,
+nr_cblocks) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
   "Consider increasing "
@@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec 
__maybe_unused)
for (i = 0; i < evlist->nr_mmaps; i++) {
struct perf_mmap *map = [i];
if (map->base)
-   record__aio_sync(map);
+   record__aio_sync(map, true);
}
 #endif
 }
@@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, 
struct perf_evlist *evli
goto out;
}
} else {
+   int idx;
/*
 * Call record__aio_sync() to wait till 
map->data buffer
 * becomes available after previous aio write 
request.
 */
-   record__aio_sync(map);
-   if (perf_mmap__aio_push(map, rec, 
record__aio_pushfn) != 0) {
+   idx = record__aio_sync(map, false);
+   if (perf_mmap__aio_push(map, rec, idx, 
record__aio_pushfn) != 0) {
rc = -1;
goto out;
}
@@ -1442,6 +1466,10 @@ static