Corey,

I reworked your patch a little bit. I promoted certain helper functions 
to perf_util.h.

I did not really like the extra max_fds parameter to the perf_setup_*().
I thought it was too much to carry around. Instead, I stash this value
into the fds[] first entry. I kept num_fds as a parameter, this way
the return value is really an error code. I also updated the patch to
the latest HEAD.

I also updated the x86 specific examples.

Let me know what you think.

Thanks.


diff --git a/perf_examples/notify_group.c b/perf_examples/notify_group.c
index 9192df2..598be72 100644
--- a/perf_examples/notify_group.c
+++ b/perf_examples/notify_group.c
@@ -44,7 +44,7 @@ typedef struct {
 static volatile unsigned long notification_received;
 
 static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
 
 static int buffer_pages = 1; /* size of buffer payload  (must be power of 2) */
 
@@ -56,7 +56,7 @@ sigio_handler(int n, struct siginfo *info, struct sigcontext 
*sc)
        uint64_t ip;
        int id, ret;
        
-       id = perf_fd2event(fds, num_events, info->si_fd);
+       id = perf_fd2event(fds, num_fds, info->si_fd);
        if (id == -1)
                errx(1, "cannot find event for descriptor %d", info->si_fd);
 
@@ -129,15 +129,15 @@ main(int argc, char **argv)
        /*
         * allocates fd for us
         */
-       num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
+       ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
                                       "PERF_COUNT_HW_CPU_CYCLES,"
                                        "PERF_COUNT_HW_CPU_CYCLES",
-                                       &fds);
-       if (num_events < 1)
+                                       &fds, &num_fds);
+       if (ret || !num_fds)
                exit(1);
 
        fds[0].fd = -1;
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                /* want a notification for each sample added to the buffer */
                fds[i].hw.disabled =  !!i;
                printf("i=%d disabled=%d\n", i, fds[i].hw.disabled);
@@ -180,7 +180,7 @@ main(int argc, char **argv)
                fds[i].pgmsk = (buffer_pages * pgsz) - 1;
        }
 
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                ret = ioctl(fds[i].fd, PERF_EVENT_IOC_REFRESH , 1);
                if (ret == -1)
                        err(1, "cannot refresh");
@@ -194,7 +194,7 @@ error:
        /*
         * destroy our session
         */
-       for(i=0; i < num_events; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/notify_self.c b/perf_examples/notify_self.c
index 12dcfc9..ceee2cd 100644
--- a/perf_examples/notify_self.c
+++ b/perf_examples/notify_self.c
@@ -40,8 +40,8 @@
 
 static volatile unsigned long notification_received;
 
-static perf_event_desc_t *fds;
-static int num_events;
+static perf_event_desc_t *fds = NULL;
+static int num_fds = 0;
 
 static int buffer_pages = 1; /* size of buffer payload (must be power of 2)*/
 
@@ -79,7 +79,7 @@ print_sample(int id)
                if (ret)
                        errx(1, "cannot read grp");
 
-               e = perf_id2event(fds, num_events, grp.id);
+               e = perf_id2event(fds, num_fds, grp.id);
                if (e == -1)
                        str = "unknown event";
                else
@@ -119,7 +119,7 @@ sigio_handler(int n, struct siginfo *info, void *uc)
        if (info->si_code != POLL_HUP)
                errx(1, "signal not generated by SIGIO");
 
-       id = perf_fd2event(fds, num_events, info->si_fd);
+       id = perf_fd2event(fds, num_fds, info->si_fd);
        if (id == -1)
                errx(1, "no event associated with fd=%d", info->si_fd);
 
@@ -188,14 +188,14 @@ main(int argc, char **argv)
        /*
         * allocates fd for us
         */
-       num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
+       ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
                                       "PERF_COUNT_HW_INSTRUCTIONS",
-                                       &fds);
-       if (num_events < 1)
+                                       &fds, &num_fds);
+       if (ret || (num_fds == 0))
                exit(1);
 
        fds[0].fd = -1;
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
 
                /* want a notification for every each added to the buffer */
                fds[i].hw.disabled = !i;
@@ -213,7 +213,7 @@ main(int argc, char **argv)
                        err(1, "cannot attach event %s", fds[i].name);
        }
        
-       sz = (3+2*num_events)*sizeof(uint64_t);
+       sz = (3+2*num_fds)*sizeof(uint64_t);
        val = malloc(sz);
        if (!val)
                err(1, "cannot allocated memory");
@@ -245,7 +245,7 @@ main(int argc, char **argv)
         * We are skipping the first 3 values (nr, time_enabled, time_running)
         * and then for each event we get a pair of values.
         */ 
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].id = val[2*i+1+3];
                printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
        }
@@ -295,7 +295,7 @@ main(int argc, char **argv)
        /*
         * destroy our session
         */
-       for(i=0; i < num_events; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/perf_util.c b/perf_examples/perf_util.c
index 4afc978..9c99b15 100644
--- a/perf_examples/perf_util.c
+++ b/perf_examples/perf_util.c
@@ -31,69 +31,86 @@
 #include <perfmon/pfmlib_perf_event.h>
 #include "perf_util.h"
 
+/* the **fd parameter must point to a null pointer on the first call
+ * max_fds and num_fds must both point to a zero value on the first call
+ * The return value is success (0) vs. failure (non-zero)
+ */
 int
-perf_setup_argv_events(const char **argv, perf_event_desc_t **fd)
+perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int 
*num_fds)
 {
-       perf_event_desc_t *fdt = NULL;
-       int num = 0, max_fd = 0, new_max;
-       int ret;
+       perf_event_desc_t *fd;
+       int new_max, ret, num, max_fds;
+       int group_leader;
 
-       if (!(argv && fd))
+       if (!(argv && fds && num_fds))
                return -1;
 
-       
-       while(*argv) {
-               if (num == max_fd) {
+       fd = *fds;
+       if (fd) {
+               max_fds = fd[0].max_fds;
+               if (max_fds < 2)
+                       return -1;
+               num = *num_fds;
+       } else {
+               max_fds = num = 0; /* bootstrap */
+       }
+       group_leader = num;
 
-                       if (!max_fd)
+       while(*argv) {
+               if (num == max_fds) {
+                       if (max_fds == 0)
                                new_max = 2;
                        else
-                               new_max = max_fd << 1;
+                               new_max = max_fds << 1;
 
-                       if (new_max < max_fd) {
+                       if (new_max < max_fds) {
                                warn("too many entries");
                                goto error;
                        }
-                       fdt = realloc(fdt, new_max * sizeof(*fdt));
-                       if (!fdt) {
+                       fd = realloc(fd, new_max * sizeof(*fd));
+                       if (!fd) {
                                warn("cannot allocate memory");
                                goto error;
                        }
                        /* reset newly allocated chunk */
-                       memset(fdt+max_fd, 0, (new_max - max_fd) * 
sizeof(*fdt));
-                       max_fd = new_max;
+                       memset(fd + max_fds, 0, (new_max - max_fds) * 
sizeof(*fd));
+                       max_fds = new_max;
+
+                       /* update max size */
+                       fd[0].max_fds = max_fds;
                }
 
-               ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, 
&fdt[num].hw, NULL, NULL);
+               ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, &fd[num].hw, 
NULL, NULL);
                if (ret != PFM_SUCCESS) {
                        warnx("event %s: %s\n", *argv, pfm_strerror(ret));
                        goto error;
                }
                /* ABI compatibility */
-               fdt[num].hw.size = sizeof(struct perf_event_attr);
-               fdt[num].name = *argv;
+               fd[num].hw.size = sizeof(struct perf_event_attr);
+
+               fd[num].name = *argv;
+               fd[num].group_leader = group_leader;
                num++;
                argv++;
        }
-       *fd = fdt;
-       return num;
+       *num_fds = num;
+       *fds = fd;
+       return 0;
 error:
-       free(fdt);
+       free(fd);
        return -1;
 }
 
 int
-perf_setup_list_events(const char *ev, perf_event_desc_t **fd)
+perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds)
 {
        const char **argv;
-       char *p, *q;
-       char *events;
-       int num = 0, i, ret;
+       char *p, *q, *events;
+       int i, ret, num = 0;
 
-       if (!(ev && fd))
+       if (!(ev && fd && num_fds))
                return -1;
 
-       
        events = strdup(ev);
        if (!events)
                return -1;
@@ -105,7 +122,7 @@ perf_setup_list_events(const char *ev, perf_event_desc_t 
**fd)
        }
        num++;
        num++; /* terminator */
-  
+
        argv = malloc(num * sizeof(char *));
        if (!argv) {
                free(events);
@@ -119,12 +136,35 @@ perf_setup_list_events(const char *ev, perf_event_desc_t 
**fd)
        }
        argv[i++] = q;
        argv[i] = NULL;
-       ret = perf_setup_argv_events(argv, fd);
+       ret = perf_setup_argv_events(argv, fd, num_fds);
        free(argv);
        return ret;
 }
 
 int
+perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx)
+{
+       int leader;
+       int i;
+
+       if (idx < 0 || idx >= num)
+               return 0;
+
+       leader = fds[idx].group_leader;
+
+       for (i = leader + 1; i < num; i++) {
+               if (fds[i].group_leader != leader) {
+                       /* This is a new group leader, so the previous
+                        * event was the final event of the preceding
+                        * group.
+                        */
+                       return i - leader;
+               }
+       }
+       return 1;
+}
+
+int
 perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, void *buf, 
size_t sz)
 {
        char *data;
diff --git a/perf_examples/perf_util.h b/perf_examples/perf_util.h
index 8f5222f..c65c003 100644
--- a/perf_examples/perf_util.h
+++ b/perf_examples/perf_util.h
@@ -36,14 +36,16 @@ typedef struct {
        uint64_t id; /* event id kernel */
        void *buf;
        size_t pgmsk;
+       int group_leader;
        int fd;
+       int max_fds;
 } perf_event_desc_t;
 
 /* handy shortcut */
 #define PERF_FORMAT_SCALE 
(PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING)
 
-extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd);
-extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd);
+extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd, 
int *num_fds);
+extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd, 
int *num_fds);
 extern int perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, 
void *buf, size_t sz);
 extern void perf_skip_buffer(struct perf_event_mmap_page *hdr, size_t sz);
 
@@ -109,4 +111,13 @@ perf_id2event(perf_event_desc_t *fds, int num_events, 
uint64_t id)
                        return j;
        return -1;
 }
+
+static inline int
+perf_is_group_leader(perf_event_desc_t *fds, int idx)
+{
+       return fds[idx].group_leader == idx;
+}
+
+extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int leader);
+
 #endif
diff --git a/perf_examples/self.c b/perf_examples/self.c
index 12ebcd4..11e3d85 100644
--- a/perf_examples/self.c
+++ b/perf_examples/self.c
@@ -65,9 +65,9 @@ noploop(void)
 int
 main(int argc, char **argv)
 {
-       perf_event_desc_t *fds;
+       perf_event_desc_t *fds = NULL;
        uint64_t values[3];
-       int i, ret, num;
+       int i, ret, num_fds = 0;
 
        setlocale(LC_ALL, "");
        /*
@@ -77,12 +77,12 @@ main(int argc, char **argv)
        if (ret != PFM_SUCCESS)
                errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
 
-       num = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : 
gen_events, &fds);
-       if (num == -1)
+       ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : 
gen_events, &fds, &num_fds);
+       if (ret || !num_fds)
                errx(1, "cannot setup events");
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                /* request timing information necessary for scaling */
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
 
@@ -97,8 +97,8 @@ main(int argc, char **argv)
        signal(SIGALRM, sig_handler);
 
        /*
-        * enable all counters attached to this thread and created by it
-        */
+        * enable all counters attached to this thread and created by it
+        */
        ret = prctl(PR_TASK_PERF_EVENTS_ENABLE);
        if (ret)
                err(1, "prctl(enable) failed");
@@ -108,8 +108,8 @@ main(int argc, char **argv)
        noploop();
 
        /*
-        * disable all counters attached to this thread
-        */
+        * disable all counters attached to this thread
+        */
        ret = prctl(PR_TASK_PERF_EVENTS_DISABLE);
        if (ret)
                err(1, "prctl(disable) failed");
@@ -121,7 +121,7 @@ main(int argc, char **argv)
         */
        memset(values, 0, sizeof(values));
 
-       for (i=0; i < num; i++) {
+       for (i=0; i < num_fds; i++) {
                uint64_t val;
                double ratio;
 
diff --git a/perf_examples/self_count.c b/perf_examples/self_count.c
index 48855fd..c77456d 100644
--- a/perf_examples/self_count.c
+++ b/perf_examples/self_count.c
@@ -179,10 +179,10 @@ read_count(perf_event_desc_t *fds)
 int
 main(int argc, char **argv)
 {
-       perf_event_desc_t *fds;
+       perf_event_desc_t *fds = NULL;
        size_t pgsz;
        uint64_t val;
-       int i, ret, num;
+       int i, ret, num_fds = 0;
        int n = 30;
 
        pgsz = sysconf(_SC_PAGESIZE);
@@ -193,12 +193,12 @@ main(int argc, char **argv)
        if (ret != PFM_SUCCESS)
                errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
 
-       num = perf_setup_argv_events(argc > 1 ? (const char **)(argv+1) : 
gen_events, &fds);
-       if (num == -1)
+       ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : 
gen_events, &fds, &num_fds);
+       if (ret || !num_fds)
                errx(1, "cannot setup events");
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                /* request timing information necesaary for scaling */
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
                fds[i].hw.disabled = 0;
@@ -215,15 +215,15 @@ main(int argc, char **argv)
        signal(SIGALRM, sig_handler);
 
        /*
-        * enable all counters attached to this thread
-        */
+        * enable all counters attached to this thread
+        */
        ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0);
 
        alarm(10);
 
        for(;quit == 0;) {
-               
-               for (i=0; i < num; i++) {
+
+               for (i=0; i < num_fds; i++) {
                        val = read_count(&fds[i]);
                        printf("%20"PRIu64" %s\n", val, fds[i].name);
                }
@@ -233,11 +233,11 @@ main(int argc, char **argv)
                        n = 30;
        }
        /*
-        * disable all counters attached to this thread
-        */
+        * disable all counters attached to this thread
+        */
        ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
 
-       for (i=0; i < num; i++) {
+       for (i=0; i < num_fds; i++) {
                munmap(fds[i].buf, pgsz);
                close(fds[i].fd);
        }
diff --git a/perf_examples/self_pipe.c b/perf_examples/self_pipe.c
index 8fb919a..90a90ff 100644
--- a/perf_examples/self_pipe.c
+++ b/perf_examples/self_pipe.c
@@ -85,8 +85,9 @@ static void
 measure(void)
 {
        perf_event_desc_t *fds = NULL;
+       int num_fds = 0;
        uint64_t values[3];
-       int i, ret, num;
+       int i, ret;
        int pr[2], pw[2];
        ssize_t nbytes;
        pid_t pid;
@@ -109,11 +110,11 @@ measure(void)
        if (ret)
                err(1, "cannot create write pipe");
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num < 1)
+       ret = perf_setup_list_events(options.events, &fds, &num_fds);
+       if (ret || !num_fds)
                exit(1);
 
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].hw.disabled = 1;
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
 
@@ -141,7 +142,7 @@ measure(void)
                        err(1, "cannot create child\n");
                case 0:
                        /* do not inherit session fd */
-                       for(i=0; i < num; i++)
+                       for(i=0; i < num_fds; i++)
                                close(fds[i].fd);
                        /* pr[]: write master, read child */
                        /* pw[]: read master, write child */
@@ -170,7 +171,7 @@ measure(void)
 
        prctl(PR_TASK_PERF_EVENTS_DISABLE);
 
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                uint64_t val;
                double ratio;
 
@@ -204,7 +205,7 @@ measure(void)
        /*
         * and destroy our session
         */
-       for(i=0; i < num; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/self_smpl_multi.c b/perf_examples/self_smpl_multi.c
index 30d6acb..0ca45f5 100644
--- a/perf_examples/self_smpl_multi.c
+++ b/perf_examples/self_smpl_multi.c
@@ -77,7 +77,6 @@ static int program_time = PROGRAM_TIME;
 static int threshold = THRESHOLD;
 static int signum = SIGIO;
 static pthread_barrier_t barrier;
-static int num_events;
 
 
 static int buffer_pages = 1;
@@ -120,8 +119,9 @@ long bad_msg[MAX_THR];
 long bad_restart[MAX_THR];
 int fown;
 
-int __thread myid; /* TLS */
+static int __thread myid; /* TLS */
 static perf_event_desc_t __thread *fds; /* TLS */
+static int __thread num_fds; /* TLS */
 
 pid_t
 gettid(void)
@@ -277,8 +277,10 @@ overflow_start(char *name)
        size_t pgsz;
        int ret, fd, flags;
 
-       num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds);
-       if (num_events != 1)
+       fds = NULL;
+       num_fds = 0;
+       ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds, 
&num_fds);
+       if (ret || !num_fds)
                errx(1, "cannot monitor event");
 
        pgsz = sysconf(_SC_PAGESIZE);
diff --git a/perf_examples/syst.c b/perf_examples/syst.c
index 429c6c4..a681e41 100644
--- a/perf_examples/syst.c
+++ b/perf_examples/syst.c
@@ -43,22 +43,21 @@ typedef struct {
 
 static options_t options;
 static perf_event_desc_t **all_fds;
-static int num;
+static int *num_fds;
 
 void
 setup_cpu(int cpu)
 {
-       perf_event_desc_t *fds = NULL;
-       int i;
+       perf_event_desc_t *fds;
+       int i, ret;
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num == -1)
+       ret = perf_setup_list_events(options.events, &all_fds[cpu], 
&num_fds[cpu]);
+       if (ret || (num_fds == 0))
                errx(1, "cannot setup events\n");
-
-       all_fds[cpu] = fds;
+       fds = all_fds[cpu]; /* temp */
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds[cpu]; i++) {
                fds[i].hw.disabled = options.group ? !i : 1;
 
                if (options.excl && ((options.group && !i) || (!options.group)))
@@ -92,10 +91,11 @@ measure(void)
                cmax = cmin + 1;
                ncpus = 1;
        }
-       all_fds = malloc(ncpus * sizeof(perf_event_desc_t));
-       if (!all_fds)
-               err(1, "cannot allocate memory for all_fds");
+       all_fds = calloc(ncpus, sizeof(perf_event_desc_t));
+       num_fds = calloc(ncpus, sizeof(int));
 
+       if (!all_fds || !num_fds)
+               err(1, "cannot allocate memory for internal structures");
        for(c=cmin ; c < cmax; c++)
                setup_cpu(c);
 
@@ -106,7 +106,7 @@ measure(void)
                fds = all_fds[c];
                if (options.group) 
                        ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0);
-               else for(i=0; i < num; i++) {
+               else for(i=0; i < num_fds[c]; i++) {
                        ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0);
                        if (ret)
                                err(1, "cannot enable event %s\n", fds[i].name);
@@ -121,7 +121,7 @@ measure(void)
                puts("------------------------");
                for(c = cmin; c < cmax; c++) {
                        fds = all_fds[c];
-                       for(i=0; i < num; i++) {
+                       for(i=0; i < num_fds[c]; i++) {
                                double ratio;
 
                                ret = read(fds[i].fd, values, sizeof(values));
@@ -152,7 +152,7 @@ measure(void)
        }
        for(c = cmin; c < cmax; c++) {
                fds = all_fds[c];
-               for(i=0; i < num; i++)
+               for(i=0; i < num_fds[c]; i++)
                        close(fds[i].fd);
        }
        free(all_fds);
diff --git a/perf_examples/task.c b/perf_examples/task.c
index b29822a..6c37b84 100644
--- a/perf_examples/task.c
+++ b/perf_examples/task.c
@@ -35,10 +35,13 @@
 
 #include "perf_util.h"
 
+#define MAX_GROUPS 16
+
 typedef struct {
-       const char *events;
+       const char *events[MAX_GROUPS];
+       int num_groups;
+       int format_group;
        int inherit;
-       int group;
        int print;
        int pin;
        pid_t pid;
@@ -59,11 +62,11 @@ child(char **arg)
 }
 
 static void
-read_group(perf_event_desc_t *fds, int num)
+read_groups(perf_event_desc_t *fds, int num)
 {
-       uint64_t *values;
-       size_t sz;
-       int i, ret;
+       uint64_t *values = NULL;
+       size_t new_sz, sz = 0;
+       int i, evt, ret;
 
        /*
         *      { u64           nr;
@@ -76,56 +79,55 @@ read_group(perf_event_desc_t *fds, int num)
         *
         * we do not use FORMAT_ID in this program
         */
-       sz = sizeof(uint64_t) * (3 + num);
-       values = malloc(sz);
-       if (!values)
-               err(1, "cannot allocate memory for values\n");
-
-       ret = read(fds[0].fd, values, sz);
-       if (ret != sz) { /* unsigned */
-               if (ret == -1)
-                       err(1, "cannot read values event %s", fds[0].name);
-               else    /* likely pinned and could not be loaded */
-                       warnx("could not read event0 ret=%d", ret);
-       }
 
-       /*
-        * propagate to save area
-        */
-       for(i=0; i < num; i++) {
-               values[0] = values[3+i];
-               /*
-                * scaling because we may be sharing the PMU and
-                * thus may be multiplexed
-                */
-               fds[i].prev_value = fds[i].value;
-               fds[i].value = perf_scale(values);
-               fds[i].enabled = values[1];
-               fds[i].running = values[2];
-       }
-       free(values);
-}
+       for (evt = 0; evt < num; ) {
+               int num_evts_to_read;
 
-static void
-read_single(perf_event_desc_t *fds, int num)
-{
-       uint64_t values[3];
-       int i, ret;
+               if (options.format_group) {
+                       num_evts_to_read = perf_get_group_nevents(fds, num, 
evt);
+                       new_sz = sizeof(uint64_t) * (3 + num_evts_to_read);
+               } else {
+                       num_evts_to_read = 1;
+                       new_sz = sizeof(uint64_t) * 3;
+               }
 
-       for(i=0; i < num; i++) {
+               if (new_sz > sz) {
+                       sz = new_sz;
+                       values = realloc(values, sz);
+               }
 
-               ret = read(fds[i].fd, values, sizeof(values));
-               if (ret != sizeof(values)) { /* unsigned */
+               if (!values)
+                       err(1, "cannot allocate memory for values\n");
+
+               ret = read(fds[evt].fd, values, sz);
+               if (ret != sz) { /* unsigned */
                        if (ret == -1)
-                               err(1, "cannot read values event %s", 
fds[i].name);
-                       else    /* likely pinned and could not be loaded */
-                               warnx("could not read event%d", i);
+                               err(1, "cannot read values event %s", 
fds[0].name);
+
+                       /* likely pinned and could not be loaded */
+                       warnx("could not read event %d, tried to read %d bytes, 
but got %d",
+                               evt, (int)sz, ret);
+               }
+
+               /*
+                * propagate to save area
+                */
+               for (i = evt; i < (evt + num_evts_to_read); i++) {
+                       if (options.format_group)
+                               values[0] = values[3 + (i - evt)];
+                       /*
+                        * scaling because we may be sharing the PMU and
+                        * thus may be multiplexed
+                        */
+                       fds[i].prev_value = fds[i].value;
+                       fds[i].value = perf_scale(values);
+                       fds[i].enabled = values[1];
+                       fds[i].running = values[2];
                }
-               fds[i].prev_value = fds[i].value;
-               fds[i].value = perf_scale(values);
-               fds[i].enabled = values[1];
-               fds[i].running = values[2];
+               evt += num_evts_to_read;
        }
+       if (values)
+               free(values);
 }
 
 static void
@@ -133,21 +135,21 @@ print_counts(perf_event_desc_t *fds, int num)
 {
        int i;
 
-       if (options.group)
-               read_group(fds, num);
-       else
-               read_single(fds, num);
+       read_groups(fds, num);
 
        for(i=0; i < num; i++) {
                double ratio;
                uint64_t val;
 
                val = fds[i].value - fds[i].prev_value;
-
                ratio = 0.0;
                if (fds[i].enabled)
                        ratio = 1.0 * fds[i].running / fds[i].enabled;
 
+               /* separate groups */
+               if (i && fds[i].hw.enable_on_exec)
+                       putchar('\n');
+
                if (ratio == 1.0)
                        printf("%'20"PRIu64" %s (%'"PRIu64" : %'"PRIu64")\n", 
val, fds[i].name, fds[i].enabled, fds[i].running);
                else
@@ -167,8 +169,8 @@ static void sig_handler(int n)
 int
 parent(char **arg)
 {
-       perf_event_desc_t *fds;
-       int status, ret, i, num;
+       perf_event_desc_t *fds = NULL;
+       int status, ret, i, num_fds = 0, grp, group_fd;
        int ready[2], go[2];
        char buf;
        pid_t pid;
@@ -176,9 +178,12 @@ parent(char **arg)
        if (pfm_initialize() != PFM_SUCCESS)
                errx(1, "libpfm initialization failed");
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num < 1)
-               exit(1);
+       for (grp = 0; grp < options.num_groups; grp++) {
+               int ret;
+               ret = perf_setup_list_events(options.events[grp], &fds, 
&num_fds);
+               if (ret || !num_fds)
+                       exit(1);
+       }
 
        pid = options.pid;
        if (!pid) {
@@ -232,37 +237,43 @@ parent(char **arg)
        }
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
+               int is_group_leader; /* boolean */
+
+               is_group_leader = perf_is_group_leader(fds, i);
+               if (is_group_leader) {
+                       /* this is the group leader */
+                       group_fd = -1;
+               } else {
+                       group_fd = fds[fds[i].group_leader].fd;
+               }
+
                /*
                 * create leader disabled with enable_on-exec
                 */
                if (!options.pid) {
-                       if (options.group) {
-                               fds[i].hw.disabled = !i;
-                               fds[i].hw.enable_on_exec = !i;
-                       } else {
-                               fds[i].hw.disabled = 1;
-                               fds[i].hw.enable_on_exec = 1;
-                       }
+                       fds[i].hw.disabled = is_group_leader;
+                       fds[i].hw.enable_on_exec = is_group_leader;
                }
 
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
                /* request timing information necessary for scaling counts */
-               if (!i && options.group)
-                       fds[0].hw.read_format |= PERF_FORMAT_GROUP;
+               if (is_group_leader && options.format_group)
+                       fds[i].hw.read_format |= PERF_FORMAT_GROUP;
 
                if (options.inherit)
                        fds[i].hw.inherit = 1;
 
-               if (options.pin && ((options.group && i== 0) || 
(!options.group)))
+               if (options.pin && is_group_leader)
                        fds[i].hw.pinned = 1;
-
-               fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, options.group 
? fds[0].fd : -1, 0);
+               fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, group_fd, 0);
                if (fds[i].fd == -1) {
                        warn("cannot attach event%d %s", i, fds[i].name);
                        goto error;
                }
-       }       
+       }
+       ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
+
 
        if (!options.pid)
                close(go[1]);
@@ -271,12 +282,12 @@ parent(char **arg)
                if (!options.pid) {
                        while(waitpid(pid, &status, WNOHANG) == 0) {
                                sleep(1);
-                               print_counts(fds, num);
+                               print_counts(fds, num_fds);
                        }
                } else {
                        while(quit == 0) {
                                sleep(1);
-                               print_counts(fds, num);
+                               print_counts(fds, num_fds);
                        }
                }
        } else {
@@ -284,10 +295,10 @@ parent(char **arg)
                        waitpid(pid, &status, 0);
                else
                        pause();
-               print_counts(fds, num);
+               print_counts(fds, num_fds);
        }
 
-       for(i=0; i < num; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
@@ -305,11 +316,11 @@ usage(void)
        printf("usage: task [-h] [-i] [-g] [-p] [-P] [-t pid] [-e 
event1,event2,...] cmd\n"
                "-h\t\tget help\n"
                "-i\t\tinherit across fork\n"
-               "-g\t\tgroup events\n"
+               "-f\t\tuse PERF_FORMAT_GROUP for reading up counts 
(experimental, not working)\n"
                "-p\t\tprint counts every second\n"
                "-P\t\tpin events\n"
                "-t pid\tmeasure existing pid\n"
-               "-e ev,ev\tlist of events to measure\n"
+               "-e ev,ev\tgroup of events to measure (multiple -e switches are 
allowed)\n"
                );
 }
 
@@ -320,13 +331,18 @@ main(int argc, char **argv)
 
        setlocale(LC_ALL, "");
 
-       while ((c=getopt(argc, argv,"he:igpPt:")) != -1) {
+       while ((c=getopt(argc, argv,"he:ifpPt:")) != -1) {
                switch(c) {
                        case 'e':
-                               options.events = optarg;
+                               if (options.num_groups < MAX_GROUPS) {
+                                       options.events[options.num_groups++] = 
optarg;
+                               } else {
+                                       errx(1, "you cannot specify more than 
%d groups.\n",
+                                               MAX_GROUPS);
+                               }
                                break;
-                       case 'g':
-                               options.group = 1;
+                       case 'f':
+                               options.format_group = 1;
                                break;
                        case 'p':
                                options.print = 1;
@@ -347,12 +363,13 @@ main(int argc, char **argv)
                                errx(1, "unknown error");
                }
        }
-       if (!options.events)
-               options.events = 
"PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
-
+       if (options.num_groups == 0) {
+               options.events[0] = 
"PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
+               options.num_groups = 1;
+       }
        if (!argv[optind] && !options.pid)
                errx(1, "you must specify a command to execute or a thread to 
attach to\n");
-       
+
        signal(SIGINT, sig_handler);
 
        return parent(argv+optind);
diff --git a/perf_examples/task_attach_timeout.c 
b/perf_examples/task_attach_timeout.c
index bfcb60d..a3bffb0 100644
--- a/perf_examples/task_attach_timeout.c
+++ b/perf_examples/task_attach_timeout.c
@@ -95,19 +95,19 @@ print_counts(perf_event_desc_t *fds, int num, int do_delta)
 int
 measure(pid_t pid)
 {
-       perf_event_desc_t *fds;
-       int i, num;
+       perf_event_desc_t *fds = NULL;
+       int i, ret, num_fds = 0;
        char fn[32];
 
        if (pfm_initialize() != PFM_SUCCESS)
                errx(1, "libpfm initialization failed\n");
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num < 1)
+       ret = perf_setup_list_events(options.events, &fds, &num_fds);
+       if (ret || (num_fds == 0))
                exit(1);
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].hw.disabled = 0; /* start immediately */
 
                /* request timing information necessary for scaling counts */
@@ -130,15 +130,15 @@ measure(pid_t pid)
                sleep(1);
                options.delay--;
                if (options.print)
-                       print_counts(fds, num, 1);
+                       print_counts(fds, num_fds, 1);
        }
        if (options.delay)
                warn("thread %d terminated before timeout", pid);
 
        if (!options.print)
-               print_counts(fds, num, 0);
+               print_counts(fds, num_fds, 0);
 
-       for(i=0; i < num; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/task_smpl.c b/perf_examples/task_smpl.c
index 7d029db..2ceeb01 100644
--- a/perf_examples/task_smpl.c
+++ b/perf_examples/task_smpl.c
@@ -58,7 +58,7 @@ typedef struct {
 static jmp_buf jbuf;
 static uint64_t collected_samples, lost_samples;
 static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
 static options_t options;
 static uint64_t sum_period;
 
@@ -293,7 +293,7 @@ display_sample(perf_event_desc_t *hw, struct 
perf_event_header *ehdr)
 
                                sz -= sizeof(grp);
 
-                               e = perf_id2event(fds, num_events, grp.id);
+                               e = perf_id2event(fds, num_fds, grp.id);
                                if (e == -1)
                                        str = "unknown sample event";
                                else
@@ -395,7 +395,7 @@ display_lost(perf_event_desc_t *hw)
        if (ret)
                errx(1, "cannot read lost info");
 
-       e = perf_id2event(fds, num_events, lost.id);
+       e = perf_id2event(fds, num_fds, lost.id);
        if (e == -1)
                str = "unknown lost event";
        else
@@ -486,8 +486,8 @@ mainloop(char **arg)
        /*
         * does allocate fds
         */
-       num_events = perf_setup_list_events(options.events, &fds);
-       if (num_events == -1)
+       ret  = perf_setup_list_events(options.events, &fds, &num_fds);
+       if (ret || !num_fds)
                errx(1, "cannot setup event list");
 
        memset(pollfds, 0, sizeof(pollfds));
@@ -512,7 +512,7 @@ mainloop(char **arg)
                errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, 
WEXITSTATUS(status));
 
        fds[0].fd = -1;
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
 
                fds[i].hw.disabled = 0; /* start immediately */
 
@@ -538,7 +538,7 @@ mainloop(char **arg)
 
                        /* must get event id for SAMPLE_GROUP */
                        fds[i].hw.read_format = PERF_FORMAT_SCALE;
-                       if (num_events > 1)
+                       if (num_fds > 1)
                                fds[i].hw.read_format |= 
PERF_FORMAT_GROUP|PERF_FORMAT_ID;
                }
 
@@ -573,7 +573,7 @@ mainloop(char **arg)
         * We are skipping the first 3 values (nr, time_enabled, time_running)
         * and then for each event we get a pair of values.
         */
-       sz = (3+2*num_events)*sizeof(uint64_t);
+       sz = (3+2*num_fds)*sizeof(uint64_t);
        val = malloc(sz);
        if (!val)
                err(1, "cannot allocate memory");
@@ -583,7 +583,7 @@ mainloop(char **arg)
                err(1, "cannot read id %zu", sizeof(val));
 
 
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].id = val[2*i+1+3];
                printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
        }
@@ -617,7 +617,7 @@ terminate_session:
         */
        wait4(pid, &status, 0, NULL);
 
-       for(i=0; i < num_events; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        /* check for partial event buffer */
diff --git a/perf_examples/x86/bts_smpl.c b/perf_examples/x86/bts_smpl.c
index 46237bd..b28fd06 100644
--- a/perf_examples/x86/bts_smpl.c
+++ b/perf_examples/x86/bts_smpl.c
@@ -59,7 +59,7 @@ typedef struct {
 static jmp_buf jbuf;
 static uint64_t collected_samples, lost_samples;
 static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
 static options_t options;
 
 static struct option the_options[]={
@@ -172,7 +172,7 @@ display_lost(perf_event_desc_t *hw)
        if (ret)
                errx(1, "cannot read lost info");
 
-       e = perf_id2event(fds, num_events, lost.id);
+       e = perf_id2event(fds, num_fds, lost.id);
        if (e == -1)
                str = "unknown lost event";
        else
@@ -239,8 +239,8 @@ mainloop(char **arg)
        /*
         * does allocate fds
         */
-       num_events = 
perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds);
-       if (num_events == -1)
+       ret = perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds, 
&num_fds);
+       if (ret || !num_fds)
                errx(1, "cannot setup event");
 
        memset(pollfds, 0, sizeof(pollfds));
diff --git a/perf_examples/x86/pebs_smpl.c b/perf_examples/x86/pebs_smpl.c
index 83ba098..f24aa44 100644
--- a/perf_examples/x86/pebs_smpl.c
+++ b/perf_examples/x86/pebs_smpl.c
@@ -54,7 +54,7 @@ typedef struct {
 static jmp_buf jbuf;
 static uint64_t collected_samples, lost_samples;
 static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
 static options_t options;
 static uint64_t sum_period;
 
@@ -353,7 +353,7 @@ display_sample(perf_event_desc_t *hw, struct 
perf_event_header *ehdr)
 
                        sz -= sizeof(grp);
 
-                       e = perf_id2event(fds, num_events, grp.id);
+                       e = perf_id2event(fds, num_fds, grp.id);
                        if (e == -1)
                                str = "unknown sample event";
                        else
@@ -419,7 +419,7 @@ display_lost(perf_event_desc_t *hw)
        if (ret)
                errx(1, "cannot read lost info");
 
-       e = perf_id2event(fds, num_events, lost.id);
+       e = perf_id2event(fds, num_fds, lost.id);
        if (e == -1)
                str = "unknown lost event";
        else
@@ -510,8 +510,8 @@ mainloop(char **arg)
        /*
         * does allocate fds
         */
-       num_events = perf_setup_list_events(options.events, &fds);
-       if (num_events == -1)
+       ret = perf_setup_list_events(options.events, &fds, &num_fds);
+       if (ret || !num_fds)
                errx(1, "cannot setup event list");
 
        memset(pollfds, 0, sizeof(pollfds));
@@ -536,7 +536,7 @@ mainloop(char **arg)
                errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, 
WEXITSTATUS(status));
 
        fds[0].fd = -1;
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
 
                fds[i].hw.disabled = 0; /* start immediately */
 
@@ -595,7 +595,7 @@ mainloop(char **arg)
         * We are skipping the first 3 values (nr, time_enabled, time_running)
         * and then for each event we get a pair of values.
         */
-       sz = (3+2*num_events)*sizeof(uint64_t);
+       sz = (3+2*num_fds)*sizeof(uint64_t);
        val = malloc(sz);
        if (!val)
                err(1, "cannot allocated memory");
@@ -605,7 +605,7 @@ mainloop(char **arg)
                err(1, "cannot read id %zu", sizeof(val));
 
 
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].id = val[2*i+1+3];
                printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
        }
@@ -639,7 +639,7 @@ terminate_session:
         */
        wait4(pid, &status, 0, NULL);
 
-       for(i=0; i < num_events; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        /* check for partial event buffer */

------------------------------------------------------------------------------
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to