Re: [PATCH v4 02/12] perf record: introduce thread specific data array

2021-04-09 Thread Bayduraev, Alexey V
Hi,

On 08.04.2021 17:16, Andi Kleen wrote:
>> +} else {
>> +thread_data[t].tid = syscall(SYS_gettid);
> 
> That always fills in the tid of the setup thread instead of the target
> threads?
> 

Here we set the tid for 0-thread (for cpu 0), other threads we setup
later in record__thread(), see [PATCH v4 05/12].

Regards,
Alexey


Re: [PATCH v4 02/12] perf record: introduce thread specific data array

2021-04-08 Thread Andi Kleen
> + } else {
> + thread_data[t].tid = syscall(SYS_gettid);

That always fills in the tid of the setup thread instead of the target
threads?


[PATCH v4 02/12] perf record: introduce thread specific data array

2021-04-06 Thread Bayduraev, Alexey V


Introduce thread specific data object and array of such objects
to store and manage thread local data. Implement functions to
allocate, initialize, finalize and release thread specific data.

Thread local maps and overwrite_maps arrays keep pointers to
mmap buffer objects to serve according to maps thread mask.
Thread local pollfd array keeps event fds connected to mmaps
buffers according to maps thread mask.

Thread control commands are delivered via thread local comm pipes
and ctlfd_pos fd. External control commands (--control option)
are delivered via evlist ctlfd_pos fd and handled by the main
tool thread.

Signed-off-by: Alexey Bayduraev 
---
 tools/lib/api/fd/array.c|  17 
 tools/lib/api/fd/array.h|   1 +
 tools/perf/builtin-record.c | 194 +++-
 3 files changed, 209 insertions(+), 3 deletions(-)

diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 5e6cb9debe37..de8bcbaea3f1 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -88,6 +88,23 @@ int fdarray__add(struct fdarray *fda, int fd, short revents, 
enum fdarray_flags
return pos;
 }
 
+int fdarray__clone(struct fdarray *fda, int pos, struct fdarray *base)
+{
+   struct pollfd *entry;
+   int npos;
+
+   if (pos >= base->nr)
+   return -EINVAL;
+
+   entry = &base->entries[pos];
+
+   npos = fdarray__add(fda, entry->fd, entry->events, 
base->priv[pos].flags);
+   if (npos >= 0)
+   fda->priv[npos] = base->priv[pos];
+
+   return npos;
+}
+
 int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd, void 
*arg),
void *arg)
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index 7fcf21a33c0c..4a03da7f1fc1 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -42,6 +42,7 @@ struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
 void fdarray__delete(struct fdarray *fda);
 
 int fdarray__add(struct fdarray *fda, int fd, short revents, enum 
fdarray_flags flags);
+int fdarray__clone(struct fdarray *fda, int pos, struct fdarray *base);
 int fdarray__poll(struct fdarray *fda, int timeout);
 int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd, void 
*arg),
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e0cbf05d255c..d74fea2d1ca9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -56,6 +56,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #ifdef HAVE_EVENTFD_SUPPORT
@@ -90,6 +91,23 @@ struct thread_mask {
struct mmap_cpu_maskaffinity;
 };
 
+struct thread_data {
+   pid_t   tid;
+   struct thread_mask  *mask;
+   struct {
+   int msg[2];
+   int ack[2];
+   } pipes;
+   struct fdarray  pollfd;
+   int ctlfd_pos;
+   struct mmap **maps;
+   struct mmap **overwrite_maps;
+   int nr_mmaps;
+   struct record   *rec;
+   unsigned long long  samples;
+   unsigned long   waking;
+};
+
 struct record {
struct perf_tooltool;
struct record_opts  opts;
@@ -115,6 +133,7 @@ struct record {
struct mmap_cpu_maskaffinity_mask;
unsigned long   output_max_size;/* = 0: unlimited */
struct thread_mask  *thread_masks;
+   struct thread_data  *thread_data;
int nr_threads;
 };
 
@@ -845,9 +864,172 @@ static int record__kcore_copy(struct machine *machine, 
struct perf_data *data)
return kcore_copy(from_dir, kcore_dir);
 }
 
+static int record__thread_data_init_pipes(struct thread_data *thread_data)
+{
+   if (pipe(thread_data->pipes.msg) || pipe(thread_data->pipes.ack)) {
+   pr_err("Failed to create thread communication pipes, error 
%m\n");
+   return -ENOMEM;
+   }
+
+   pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
+thread_data->pipes.msg[0], thread_data->pipes.msg[1],
+thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
+
+   return 0;
+}
+
+static int record__thread_data_init_maps(struct thread_data *thread_data, 
struct evlist *evlist)
+{
+   int m, tm, nr_mmaps = evlist->core.nr_mmaps;
+   struct mmap *mmap = evlist->mmap;
+   struct mmap *overwrite_mmap = evlist->overwrite_mmap;
+   struct perf_cpu_map *cpus = evlist->core.cpus;
+
+   thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, 
thread_data->mask->maps.nbits);
+   if (mmap) {
+   thread_data->maps = zalloc(thread_data->nr_mmaps * 
sizeof(struct mmap *));
+   if (!thread_data->maps) {
+