This perf tool patch adds the ability to filter monitoring based on container
groups (cgroups) for both perf stat and perf record. The cgroup to monitor are
passed via a new -G option followed by a list of cgroups.

The cgroup filesystem has to be mounted. The tool will find it automatically,
open the right file and pass the descriptor to perf_events.

In this fourth version, we fix the case where no event is specified yet -G
is used. In that case, the cgroup constraint is applied to all events.

Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
 Performance counter stats for 'sleep 1':

      2,368,667,414  cycles                   test1
      2,369,661,459  cycles                  
      <not counted>  cycles                   test2

        1.001856890  seconds time elapsed

Signed-off-by: Stephane Eranian <eran...@google.com>

---

diff --git a/tools/perf/Documentation/perf-record.txt 
b/tools/perf/Documentation/perf-record.txt
index 3ee27dc..0f9b8c8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -116,6 +116,12 @@ Do not update the builid cache. This saves some overhead 
in situations
 where the information in the perf.data file (which includes buildids)
 is sufficient.
 
+-G name::
+--cgroup name::
+monitor only in the container called "name". This option is available only in 
per-cpu
+mode. The cgroup filesystem must be mounted. All threads belonging to 
container "name"
+are monitored when they run on the monitored CPUs.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index 4b3a2d4..4115f77 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -53,6 +53,11 @@ comma-sperated list with no space: 0,1. Ranges of CPUs are 
specified with -: 0-2
 In per-thread mode, this option is ignored. The -a option is still necessary
 to activate system-wide monitoring. Default is to count on all CPUs.
 
+-G name::
+--cgroup name::
+monitor only in the container called "name". This option is available only in 
per-cpu
+mode. The cgroup filesystem must be mounted. All threads belonging to 
container "name"
+are monitored when they run on the monitored CPUs.
 EXAMPLES
 --------
 
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d1db0f6..7b27c49 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -417,6 +417,7 @@ LIB_H += util/probe-finder.h
 LIB_H += util/probe-event.h
 LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
+LIB_H += util/cgroup.h
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -464,6 +465,7 @@ LIB_OBJS += $(OUTPUT)util/hist.o
 LIB_OBJS += $(OUTPUT)util/probe-event.o
 LIB_OBJS += $(OUTPUT)util/util.o
 LIB_OBJS += $(OUTPUT)util/cpumap.o
+LIB_OBJS += $(OUTPUT)util/cgroup.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ff77b80..c1b330a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/cpumap.h"
+#include "util/cgroup.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -229,6 +230,8 @@ static void create_counter(int counter, int cpu)
        char *filter = filters[counter];
        struct perf_event_attr *attr = attrs + counter;
        struct perf_header_attr *h_attr;
+       unsigned long flags = 0;
+       int pid;
        int track = !counter; /* only the first counter needs these */
        int thread_index;
        int ret;
@@ -286,6 +289,9 @@ static void create_counter(int counter, int cpu)
                attr->sample_type       |= PERF_SAMPLE_CPU;
        }
 
+       if (cgroups[counter])
+               flags = PERF_FLAG_PID_CGROUP;
+
        attr->mmap              = track;
        attr->comm              = track;
        attr->inherit           = !no_inherit;
@@ -296,8 +302,13 @@ static void create_counter(int counter, int cpu)
 
        for (thread_index = 0; thread_index < thread_num; thread_index++) {
 try_again:
+               if (cgroups[counter])
+                       pid = cgroups_fd[counter];
+               else
+                       pid = all_tids[thread_index];
+
                fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr,
-                               all_tids[thread_index], cpu, group_fd, 0);
+                               pid, cpu, group_fd, flags);
 
                if (fd[nr_cpu][counter][thread_index] < 0) {
                        int err = errno;
@@ -828,6 +839,9 @@ static const struct option options[] = {
                    "don't sample"),
        OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
                    "do not update the buildid cache"),
+       OPT_CALLBACK('G', "cgroup", NULL, "name",
+                    "monitor in cgroup name only",
+                    parse_cgroups),
        OPT_END()
 };
 
@@ -851,6 +865,9 @@ int cmd_record(int argc, const char **argv, const char 
*prefix __used)
                write_mode = WRITE_FORCE;
        }
 
+       if (nr_cgroups && !system_wide)
+               usage_with_options(record_usage, options);
+
        symbol__init();
        if (no_buildid)
                disable_buildid_cache();
@@ -861,6 +878,9 @@ int cmd_record(int argc, const char **argv, const char 
*prefix __used)
                attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
        }
 
+       if (open_cgroups())
+               usage_with_options(record_usage, options);
+
        if (target_pid != -1) {
                target_tid = target_pid;
                thread_num = find_all_tid(target_pid, &all_tids);
@@ -870,6 +890,7 @@ int cmd_record(int argc, const char **argv, const char 
*prefix __used)
                        usage_with_options(record_usage, options);
                }
        } else {
+               err = -ENOMEM;
                all_tids=malloc(sizeof(pid_t));
                if (!all_tids)
                        goto out_symbol_exit;
@@ -921,5 +942,6 @@ out_free_fd:
        all_tids = NULL;
 out_symbol_exit:
        symbol__exit();
+       close_cgroups();
        return err;
 }
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6b4d44..9bde425 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -47,6 +47,7 @@
 #include "util/header.h"
 #include "util/cpumap.h"
 #include "util/thread.h"
+#include "util/cgroup.h"
 
 #include <sys/prctl.h>
 #include <math.h>
@@ -151,6 +152,8 @@ struct stats                        runtime_branches_stats;
 static int create_perf_stat_counter(int counter)
 {
        struct perf_event_attr *attr = attrs + counter;
+       unsigned long flags = 0;
+       int pid = -1;
        int thread;
        int ncreated = 0;
 
@@ -161,9 +164,13 @@ static int create_perf_stat_counter(int counter)
        if (system_wide) {
                int cpu;
 
+               if (cgroups[counter]) {
+                       flags = PERF_FLAG_PID_CGROUP;
+                       pid = cgroups_fd[counter];
+               }
                for (cpu = 0; cpu < nr_cpus; cpu++) {
                        fd[cpu][counter][0] = sys_perf_event_open(attr,
-                                       -1, cpumap[cpu], -1, 0);
+                                       pid, cpumap[cpu], -1, flags);
                        if (fd[cpu][counter][0] < 0)
                                pr_debug(ERR_PERF_OPEN, counter,
                                         fd[cpu][counter][0], strerror(errno));
@@ -398,6 +405,9 @@ static void abs_printout(int counter, double avg)
        else
                fprintf(stderr, " %18.0f  %-24s", avg, event_name(counter));
 
+       if (cgroups[counter])
+               fprintf(stderr, " %s", cgroups[counter]);
+
        if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
                total = avg_stats(&runtime_cycles_stats);
 
@@ -433,8 +443,13 @@ static void print_counter(int counter)
        int scaled = event_scaled[counter];
 
        if (scaled == -1) {
-               fprintf(stderr, " %18s  %-24s\n",
+               fprintf(stderr, " %18s  %-24s",
                        "<not counted>", event_name(counter));
+
+               if (cgroups[counter])
+                       fprintf(stderr, " %s", cgroups[counter]);
+
+               fprintf(stderr, "\n");
                return;
        }
 
@@ -454,7 +469,6 @@ static void print_counter(int counter)
                fprintf(stderr, "  (scaled from %.2f%%)",
                                100 * avg_running / avg_enabled);
        }
-
        fprintf(stderr, "\n");
 }
 
@@ -545,6 +559,9 @@ static const struct option options[] = {
                    "print large numbers with thousands\' separators"),
        OPT_STRING('C', "cpu", &cpu_list, "cpu",
                    "list of cpus to monitor in system-wide"),
+       OPT_CALLBACK('G', "cgroup", NULL, "name",
+                    "monitor in cgroup name only",
+                    parse_cgroups),
        OPT_END()
 };
 
@@ -562,10 +579,23 @@ int cmd_stat(int argc, const char **argv, const char 
*prefix __used)
        if (run_count <= 0)
                usage_with_options(stat_usage, options);
 
+       if (nr_cgroups && !system_wide)
+               usage_with_options(stat_usage, options);
+
        /* Set attrs and nr_counters if no event is selected and !null_run */
        if (!null_run && !nr_counters) {
                memcpy(attrs, default_attrs, sizeof(default_attrs));
                nr_counters = ARRAY_SIZE(default_attrs);
+               if (nr_cgroups == 1) {
+                       for (i = 1; i < nr_counters; i++) {
+                               cgroups[i] = strdup(cgroups[0]);
+                               if (!cgroups[i]) {
+                                       close_cgroups();
+                                       return -ENOMEM;
+                               }
+                               nr_cgroups++;
+                       }
+               }
        }
 
        if (system_wide)
@@ -612,6 +642,9 @@ int cmd_stat(int argc, const char **argv, const char 
*prefix __used)
        signal(SIGALRM, skip_signal);
        signal(SIGABRT, skip_signal);
 
+       if (open_cgroups())
+               usage_with_options(stat_usage, options);
+
        status = 0;
        for (run_idx = 0; run_idx < run_count; run_idx++) {
                if (run_count != 1 && verbose)
@@ -622,5 +655,7 @@ int cmd_stat(int argc, const char **argv, const char 
*prefix __used)
        if (status != -1)
                print_stat(argc, argv);
 
+       close_cgroups();
+
        return status;
 }

------------------------------------------------------------------------------
Beautiful is writing same markup. Internet Explorer 9 supports
standards for HTML5, CSS3, SVG 1.1,  ECMAScript5, and DOM L2 & L3.
Spend less time writing and  rewriting code and more time creating great
experiences on the web. Be a part of the beta today.
http://p.sf.net/sfu/beautyoftheweb
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to