This is just a prototype.

Previous patch enables sharing hardware PMU among perf_events within same
perf_event_context. This sharing comes with limitation that per CPU event
cannot share hardware PMU with per task event. This limitation becomes
a blocker when certain events could only use a specific PMU, for example,
ref-cycles in some Intel CPUs. The following two commands will not share
the PMU (when run in parallel):

   perf stat -e ref-cycles -I 1000
   perf stat -e ref-cycles -I 1000 --pid <pid>

This patch shows a prototype that solves this problem with cgroup events.
With this patch, the following two commands can share the PMU:

   perf stat -e ref-cycles -I 1000
   perf stat -e ref-cycles -I 1000 --pid <pid> --create-cgroup

The second command creates a cgroup for the pid, and move the pid to
that cgroup. Then, a cgroup event (instead of task event) is created
to monitor the process.

Alternatively, we can also create a mechanism in the kernel that is very
similar to cgroup perf events. I am also open to other suggestions.

Signed-off-by: Song Liu <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Jiri Olsa <[email protected]>
---
 tools/perf/builtin-stat.c | 26 ++++++++++++++
 tools/perf/util/cgroup.c  | 76 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/cgroup.h  |  5 +++
 tools/perf/util/target.h  |  1 +
 4 files changed, 108 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b86aba1c8028..66a4da2d506e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -646,6 +646,17 @@ static void print_counters(struct timespec *ts, int argc, 
const char **argv)
                                    ts, argc, argv);
 }

+static void cleanup(void)
+{
+       /* clean up cgroups */
+       if (target.create_cgroup) {
+               char name[32];
+
+               scnprintf(name, 31, "perf.%u", getpid());
+               cgroup__cleanup(name);
+       }
+}
+
 static volatile int signr = -1;

 static void skip_signal(int signo)
@@ -661,6 +672,7 @@ static void skip_signal(int signo)
         * and fast PID recycling
         */
        child_pid = -1;
+       cleanup();
 }

 static void sig_atexit(void)
@@ -725,6 +737,8 @@ static const struct option stat_options[] = {
                   "stat events on existing process id"),
        OPT_STRING('t', "tid", &target.tid, "tid",
                   "stat events on existing thread id"),
+       OPT_BOOLEAN(0, "create-cgroup", &target.create_cgroup,
+                   "create a cgroup for the pid/tid"),
        OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
                    "system-wide collection from all CPUs"),
        OPT_BOOLEAN('g', "group", &group,
@@ -1607,6 +1621,17 @@ int cmd_stat(int argc, const char **argv)
        perf_stat__collect_metric_expr(evsel_list);
        perf_stat__init_shadow_stats();

+       if (target.create_cgroup) {
+               char name[32];
+
+               scnprintf(name, 31, "perf.%u", getpid());
+               cgroup__create(name);
+               cgroup__add_pid(name, strtoul(target.pid, NULL, 0));
+
+               cgroup__add_evlist(name, evsel_list);
+               target.pid = NULL;
+       }
+
        if (stat_config.csv_sep) {
                stat_config.csv_output = true;
                if (!strcmp(stat_config.csv_sep, "\\t"))
@@ -1906,5 +1931,6 @@ int cmd_stat(int argc, const char **argv)

        runtime_stat_delete(&stat_config);

+       cleanup();
        return status;
 }
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index ccd02634a616..f3e706f6fa96 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -249,3 +249,79 @@ int parse_cgroups(const struct option *opt, const char 
*str,
        }
        return 0;
 }
+
+int cgroup__add_evlist(const char *name, struct perf_evlist *evlist)
+{
+       return add_cgroup(evlist, name);
+}
+
+int cgroup__create(const char *name)
+{
+       char path[PATH_MAX + 1];
+       char mnt[PATH_MAX + 1];
+
+       if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+               return -1;
+
+       scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+
+       return mkdir(path, 0755);
+}
+
+int cgroup__add_pid(const char *name, pid_t pid)
+{
+       char path[PATH_MAX + 1];
+       char mnt[PATH_MAX + 1];
+       char buf[32];
+       int fd;
+
+       if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+               return -1;
+
+       scnprintf(path, PATH_MAX, "%s/%s/cgroup.procs", mnt, name);
+       fd = open(path, O_WRONLY);
+       if (fd < 0)
+               return -1;
+       scnprintf(buf, 31, "%u", pid);
+       if (write(fd, buf, strlen(buf)) < 0)
+               fprintf(stderr, "Error writing %s to %s\n", buf, path);
+
+       close(fd);
+       return 0;
+}
+
+int cgroup__cleanup(const char *name)
+{
+       char path[PATH_MAX + 1];
+       char mnt[PATH_MAX + 1];
+       char *line;
+       size_t len = 0;
+       FILE *fp;
+       int fd;
+
+       if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+               return -1;
+
+       scnprintf(path, PATH_MAX, "%s/%s/cgroup.procs", mnt, name);
+       fp = fopen(path, "r");
+
+       if (fp == NULL)
+               return -1;
+
+       scnprintf(path, PATH_MAX, "%s/cgroup.procs", mnt);
+       fd = open(path, O_WRONLY);
+       if (fd < 0) {
+               fclose(fp);
+               return -1;
+       }
+
+       while (getline(&line, &len, fp) != -1) {
+               if (write(fd, line, strlen(line)) < 0)
+                       fprintf(stderr, "Error writing %s to %s\n", line, path);
+       }
+       close(fd);
+       fclose(fp);
+
+       scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+       return rmdir(path);
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index f033a80c1b14..7bdd8d99d130 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -26,4 +26,9 @@ void evlist__set_default_cgroup(struct perf_evlist *evlist, 
struct cgroup *cgrou

 int parse_cgroups(const struct option *opt, const char *str, int unset);

+int cgroup__create(const char *name);
+int cgroup__cleanup(const char *name);
+int cgroup__add_pid(const char *name, pid_t pid);
+int cgroup__add_evlist(const char *name, struct perf_evlist *evlist);
+
 #endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 6ef01a83b24e..03c9ac06660a 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -15,6 +15,7 @@ struct target {
        bool         uses_mmap;
        bool         default_per_cpu;
        bool         per_thread;
+       bool         create_cgroup;
 };

 enum target_errno {
--
2.17.1

Reply via email to