- Allow specifying multiple groups to perf_examples/task via multiple
  -e <evt>[,<evt>]... switches
- Remove -g option because it's now the default.  Every -e switch
  gets its own group.
- Add -f option to specify using the experimental PERF_FORMAT_GROUP
  option.  PERF_FORMAT_GROUP appears to be broken in the 2.6.32
  and 2.6.33 when monitoring processes other than the current
  process.  I added this option instead of using a #ifdefs so that
  the code stays live and to avoid maintenance issues down the road.

Because the default behavior is to allow multiple groups, there is no
longer a need for the "read_single()" function, so I have eliminated
it.

This change simplifies the group leader handling and unifies the
logic so that grouping is always used... only the size and
number of groups varies.

To achieve these goals, the profiles to perf_setup_argv_events
and perf_setup_list_events needed to be changed to add num_fds
and max_fds as "in/out" parameters, so that the code can be
reentrant.

This change rippled through the other example programs, and to
make the code more clear, I changed the usage of "num" to "num_fds",
"num_events" to "num_fds", and added the "max_fds".  In some of the
examples, these needed to be thread-local storage, and in other cases
(syst.c) there is an array of them, indexed by cpu number.

Signed-off-by: Corey Ashford <cjash...@linux.vnet.ibm.com>
---
 perf_examples/notify_group.c        |   18 ++--
 perf_examples/notify_self.c         |   22 ++--
 perf_examples/perf_util.c           |   47 +++++----
 perf_examples/perf_util.h           |    5 +-
 perf_examples/self.c                |   12 +-
 perf_examples/self_count.c          |   14 ++--
 perf_examples/self_pipe.c           |   15 ++--
 perf_examples/self_smpl_multi.c     |   10 ++-
 perf_examples/syst.c                |   29 +++---
 perf_examples/task.c                |  197 ++++++++++++++++++++---------------
 perf_examples/task_attach_timeout.c |   16 ++--
 perf_examples/task_smpl.c           |   22 ++--
 12 files changed, 221 insertions(+), 186 deletions(-)

diff --git a/perf_examples/notify_group.c b/perf_examples/notify_group.c
index 9192df2..95907fa 100644
--- a/perf_examples/notify_group.c
+++ b/perf_examples/notify_group.c
@@ -43,8 +43,8 @@ typedef struct {
 
 static volatile unsigned long notification_received;
 
-static perf_event_desc_t *fds;
-static int num_events;
+static perf_event_desc_t *fds = NULL;
+static int num_fds = 0, max_fds = 0;
 
 static int buffer_pages = 1; /* size of buffer payload  (must be power of 2) */
 
@@ -56,7 +56,7 @@ sigio_handler(int n, struct siginfo *info, struct sigcontext 
*sc)
        uint64_t ip;
        int id, ret;
        
-       id = perf_fd2event(fds, num_events, info->si_fd);
+       id = perf_fd2event(fds, num_fds, info->si_fd);
        if (id == -1)
                errx(1, "cannot find event for descriptor %d", info->si_fd);
 
@@ -129,15 +129,15 @@ main(int argc, char **argv)
        /*
         * allocates fd for us
         */
-       num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
+       ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
                                       "PERF_COUNT_HW_CPU_CYCLES,"
                                        "PERF_COUNT_HW_CPU_CYCLES",
-                                       &fds);
-       if (num_events < 1)
+                                       &fds, &num_fds, &max_fds);
+       if (ret || (num_fds == 0))
                exit(1);
 
        fds[0].fd = -1;
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                /* want a notification for each sample added to the buffer */
                fds[i].hw.disabled =  !!i;
                printf("i=%d disabled=%d\n", i, fds[i].hw.disabled);
@@ -180,7 +180,7 @@ main(int argc, char **argv)
                fds[i].pgmsk = (buffer_pages * pgsz) - 1;
        }
 
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                ret = ioctl(fds[i].fd, PERF_EVENT_IOC_REFRESH , 1);
                if (ret == -1)
                        err(1, "cannot refresh");
@@ -194,7 +194,7 @@ error:
        /*
         * destroy our session
         */
-       for(i=0; i < num_events; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/notify_self.c b/perf_examples/notify_self.c
index 99074a5..cf96c74 100644
--- a/perf_examples/notify_self.c
+++ b/perf_examples/notify_self.c
@@ -40,8 +40,8 @@
 
 static volatile unsigned long notification_received;
 
-static perf_event_desc_t *fds;
-static int num_events;
+static perf_event_desc_t *fds = NULL;
+static int num_fds = 0, max_fds = 0;
 
 static int buffer_pages = 1; /* size of buffer payload (must be power of 2)*/
 
@@ -79,7 +79,7 @@ print_sample(int id)
                if (ret)
                        errx(1, "cannot read grp");
 
-               e = perf_id2event(fds, num_events, grp.id);
+               e = perf_id2event(fds, num_fds, grp.id);
                if (e == -1)
                        str = "unknown event";
                else
@@ -119,7 +119,7 @@ sigio_handler(int n, struct siginfo *info, void *uc)
        if (info->si_code != POLL_HUP)
                errx(1, "signal not generated by SIGIO");
 
-       id = perf_fd2event(fds, num_events, info->si_fd);
+       id = perf_fd2event(fds, num_fds, info->si_fd);
        if (id == -1)
                errx(1, "no event associated with fd=%d", info->si_fd);
 
@@ -188,14 +188,14 @@ main(int argc, char **argv)
        /*
         * allocates fd for us
         */
-       num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
+       ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
                                       "PERF_COUNT_HW_INSTRUCTIONS",
-                                       &fds);
-       if (num_events < 1)
+                                       &fds, &num_fds, &max_fds);
+       if (ret || (num_fds == 0))
                exit(1);
 
        fds[0].fd = -1;
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
 
                /* want a notification for every each added to the buffer */
                fds[i].hw.disabled = !i;
@@ -213,7 +213,7 @@ main(int argc, char **argv)
                        err(1, "cannot attach event %s", fds[i].name);
        }
        
-       sz = (3+2*num_events)*sizeof(uint64_t);
+       sz = (3+2*num_fds)*sizeof(uint64_t);
        val = malloc(sz);
        if (!val)
                err(1, "cannot allocated memory");
@@ -245,7 +245,7 @@ main(int argc, char **argv)
         * We are skipping the first 3 values (nr, time_enabled, time_running)
         * and then for each event we get a pair of values.
         */ 
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].id = val[2*i+1+3];
                printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
        }
@@ -295,7 +295,7 @@ main(int argc, char **argv)
        /*
         * destroy our session
         */
-       for(i=0; i < num_events; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/perf_util.c b/perf_examples/perf_util.c
index 4de21a8..d680ef0 100644
--- a/perf_examples/perf_util.c
+++ b/perf_examples/perf_util.c
@@ -31,65 +31,68 @@
 #include <perfmon/pfmlib_perf_event.h>
 #include "perf_util.h"
 
+/* the **fd parameter must point to a null pointer on the first call
+ * max_fds and num_fds must both point to a zero value on the first call
+ * The return value is success (0) vs. failure (non-zero) */
 int
-perf_setup_argv_events(char **argv, perf_event_desc_t **fd)
+perf_setup_argv_events(char **argv, perf_event_desc_t **fd, int *num_fds, int 
*max_fds)
 {
-       perf_event_desc_t *fdt = NULL;
-       int num = 0, max_fd = 0, new_max;
+       int new_max;
        int ret;
+       const int group_leader = *num_fds;
 
-       if (!(argv && fd))
+       if (!(argv && fd && num_fds && max_fds))
                return -1;
 
        
        while(*argv) {
-               if (num == max_fd) {
+               if (*num_fds == *max_fds) {
 
-                       if (!max_fd)
+                       if (*max_fds == 0)
                                new_max = 2;
                        else
-                               new_max = max_fd << 1;
+                               new_max = *max_fds << 1;
 
-                       if (new_max < max_fd) {
+                       if (new_max < *max_fds) {
                                warn("too many entries");
                                goto error;
                        }
-                       fdt = realloc(fdt, new_max * sizeof(*fdt));
-                       if (!fdt) {
+                       *fd = realloc(*fd, new_max * sizeof(perf_event_desc_t));
+                       if (!*fd) {
                                warn("cannot allocate memory");
                                goto error;
                        }
                        /* reset newly allocated chunk */
-                       memset(fdt+max_fd, 0, (new_max - max_fd) * 
sizeof(*fdt));
-                       max_fd = new_max;
+                       memset(*fd + *max_fds, 0, (new_max - *max_fds) * 
sizeof(perf_event_desc_t));
+                       *max_fds = new_max;
                }
 
-               ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, 
&fdt[num].hw, NULL, NULL);
+               ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, 
&(*fd)[*num_fds].hw, NULL, NULL);
                if (ret != PFM_SUCCESS) {
                        warnx("event %s: %s\n", *argv, pfm_strerror(ret));
                        goto error;
                }
                /* ABI compatibility */
-               fdt[num].hw.size = sizeof(struct perf_event_attr);
-               fdt[num].name = *argv;
-               num++;
+               (*fd)[*num_fds].hw.size = sizeof(struct perf_event_attr);
+               (*fd)[*num_fds].name = *argv;
+               (*fd)[*num_fds].group_leader = group_leader;
+               (*num_fds)++;
                argv++;
        }
-       *fd = fdt;
-       return num;
+       return 0;
 error:
-       free(fdt);
+       free(*fd);
        return -1;
 }
 
 int
-perf_setup_list_events(const char *ev, perf_event_desc_t **fd)
+perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds, 
int *max_fds)
 {
        char **argv, *p, *q;
        char *events;
        int num = 0, i, ret;
 
-       if (!(ev && fd))
+       if (!(ev && fd && num_fds && max_fds))
                return -1;
 
        
@@ -118,7 +121,7 @@ perf_setup_list_events(const char *ev, perf_event_desc_t 
**fd)
        }
        argv[i++] = q;
        argv[i] = NULL;
-       ret = perf_setup_argv_events(argv, fd);
+       ret = perf_setup_argv_events(argv, fd, num_fds, max_fds);
        free(argv);
        return ret;
 }
diff --git a/perf_examples/perf_util.h b/perf_examples/perf_util.h
index ae5449a..3e4fe75 100644
--- a/perf_examples/perf_util.h
+++ b/perf_examples/perf_util.h
@@ -36,14 +36,15 @@ typedef struct {
        uint64_t id; /* event id kernel */
        void *buf;
        size_t pgmsk;
+       int group_leader;
        int fd;
 } perf_event_desc_t;
 
 /* handy shortcut */
 #define PERF_FORMAT_SCALE 
(PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING)
 
-extern int perf_setup_argv_events(char **argv, perf_event_desc_t **fd);
-extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd);
+extern int perf_setup_argv_events(char **argv, perf_event_desc_t **fd, int 
*num_fds, int *max_fds);
+extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd, 
int *num_fds, int *max_fds);
 extern int perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, 
void *buf, size_t sz);
 extern void perf_skip_buffer(struct perf_event_mmap_page *hdr, size_t sz);
 
diff --git a/perf_examples/self.c b/perf_examples/self.c
index 67c5e80..1cea786 100644
--- a/perf_examples/self.c
+++ b/perf_examples/self.c
@@ -65,9 +65,9 @@ noploop(void)
 int
 main(int argc, char **argv)
 {
-       perf_event_desc_t *fds;
+       perf_event_desc_t *fds = NULL;
        uint64_t values[3];
-       int i, ret, num;
+       int i, ret, num_fds = 0, max_fds = 0;
 
        setlocale(LC_ALL, "");
        /*
@@ -77,12 +77,12 @@ main(int argc, char **argv)
        if (ret != PFM_SUCCESS)
                errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
 
-       num = perf_setup_argv_events(argc > 1 ? argv+1 : gen_events, &fds);
-       if (num == -1)
+       num_fds = perf_setup_argv_events(argc > 1 ? argv+1 : gen_events, &fds, 
&num_fds, &max_fds);
+       if (ret || num_fds == 0)
                errx(1, "cannot setup events");
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                /* request timing information necessary for scaling */
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
 
@@ -121,7 +121,7 @@ main(int argc, char **argv)
         */
        memset(values, 0, sizeof(values));
 
-       for (i=0; i < num; i++) {
+       for (i=0; i < num_fds; i++) {
                uint64_t val;
                double ratio;
 
diff --git a/perf_examples/self_count.c b/perf_examples/self_count.c
index 306b372..d334e24 100644
--- a/perf_examples/self_count.c
+++ b/perf_examples/self_count.c
@@ -179,10 +179,10 @@ read_count(perf_event_desc_t *fds)
 int
 main(int argc, char **argv)
 {
-       perf_event_desc_t *fds;
+       perf_event_desc_t *fds = NULL;
        size_t pgsz;
        uint64_t val;
-       int i, ret, num;
+       int i, ret, num_fds = 0, max_fds = 0;
        int n = 30;
 
        pgsz = sysconf(_SC_PAGESIZE);
@@ -193,12 +193,12 @@ main(int argc, char **argv)
        if (ret != PFM_SUCCESS)
                errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
 
-       num = perf_setup_argv_events(argc > 1 ? argv+1 : gen_events, &fds);
-       if (num == -1)
+       ret = perf_setup_argv_events(argc > 1 ? argv+1 : gen_events, &fds, 
&num_fds, &max_fds);
+       if (ret || (num_fds == -1))
                errx(1, "cannot setup events");
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                /* request timing information necesaary for scaling */
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
                fds[i].hw.disabled = 0;
@@ -223,7 +223,7 @@ main(int argc, char **argv)
 
        for(;quit == 0;) {
                
-               for (i=0; i < num; i++) {
+               for (i=0; i < num_fds; i++) {
                        val = read_count(&fds[i]);
                        printf("%20"PRIu64" %s\n", val, fds[i].name);
                }
@@ -237,7 +237,7 @@ main(int argc, char **argv)
         */
        ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
 
-       for (i=0; i < num; i++) {
+       for (i=0; i < num_fds; i++) {
                munmap(fds[i].buf, pgsz);
                close(fds[i].fd);
        }
diff --git a/perf_examples/self_pipe.c b/perf_examples/self_pipe.c
index 8fb919a..f103d70 100644
--- a/perf_examples/self_pipe.c
+++ b/perf_examples/self_pipe.c
@@ -85,8 +85,9 @@ static void
 measure(void)
 {
        perf_event_desc_t *fds = NULL;
+       int num_fds = 0, max_fds = 0;
        uint64_t values[3];
-       int i, ret, num;
+       int i, ret;
        int pr[2], pw[2];
        ssize_t nbytes;
        pid_t pid;
@@ -109,11 +110,11 @@ measure(void)
        if (ret)
                err(1, "cannot create write pipe");
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num < 1)
+       ret = perf_setup_list_events(options.events, &fds, &num_fds, &max_fds);
+       if (ret || (num_fds == 0))
                exit(1);
 
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].hw.disabled = 1;
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
 
@@ -141,7 +142,7 @@ measure(void)
                        err(1, "cannot create child\n");
                case 0:
                        /* do not inherit session fd */
-                       for(i=0; i < num; i++)
+                       for(i=0; i < num_fds; i++)
                                close(fds[i].fd);
                        /* pr[]: write master, read child */
                        /* pw[]: read master, write child */
@@ -170,7 +171,7 @@ measure(void)
 
        prctl(PR_TASK_PERF_EVENTS_DISABLE);
 
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                uint64_t val;
                double ratio;
 
@@ -204,7 +205,7 @@ measure(void)
        /*
         * and destroy our session
         */
-       for(i=0; i < num; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/self_smpl_multi.c b/perf_examples/self_smpl_multi.c
index 30d6acb..db64cb7 100644
--- a/perf_examples/self_smpl_multi.c
+++ b/perf_examples/self_smpl_multi.c
@@ -77,7 +77,6 @@ static int program_time = PROGRAM_TIME;
 static int threshold = THRESHOLD;
 static int signum = SIGIO;
 static pthread_barrier_t barrier;
-static int num_events;
 
 
 static int buffer_pages = 1;
@@ -122,6 +121,8 @@ int fown;
 
 int __thread myid; /* TLS */
 static perf_event_desc_t __thread *fds; /* TLS */
+static int __thread num_fds; /* TLS */
+static int __thread max_fds; /* TLS */
 
 pid_t
 gettid(void)
@@ -277,8 +278,11 @@ overflow_start(char *name)
        size_t pgsz;
        int ret, fd, flags;
 
-       num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds);
-       if (num_events != 1)
+       fds = NULL;
+       num_fds = 0;
+       max_fds = 0;
+       ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds, 
&num_fds, &max_fds);
+       if (ret || (num_fds == 0))
                errx(1, "cannot monitor event");
 
        pgsz = sysconf(_SC_PAGESIZE);
diff --git a/perf_examples/syst.c b/perf_examples/syst.c
index 429c6c4..879932c 100644
--- a/perf_examples/syst.c
+++ b/perf_examples/syst.c
@@ -43,22 +43,21 @@ typedef struct {
 
 static options_t options;
 static perf_event_desc_t **all_fds;
-static int num;
+static int *num_fds, *max_fds;
 
 void
 setup_cpu(int cpu)
 {
-       perf_event_desc_t *fds = NULL;
-       int i;
+       perf_event_desc_t *fds;
+       int i, ret;
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num == -1)
+       ret = perf_setup_list_events(options.events, &all_fds[cpu], 
&num_fds[cpu], &max_fds[cpu]);
+       if (ret || (num_fds == 0))
                errx(1, "cannot setup events\n");
-
-       all_fds[cpu] = fds;
+       fds = all_fds[cpu]; /* temp */
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds[cpu]; i++) {
                fds[i].hw.disabled = options.group ? !i : 1;
 
                if (options.excl && ((options.group && !i) || (!options.group)))
@@ -92,10 +91,12 @@ measure(void)
                cmax = cmin + 1;
                ncpus = 1;
        }
-       all_fds = malloc(ncpus * sizeof(perf_event_desc_t));
-       if (!all_fds)
-               err(1, "cannot allocate memory for all_fds");
+       all_fds = calloc(ncpus, sizeof(perf_event_desc_t));
+       num_fds = calloc(ncpus, sizeof(int));
+       max_fds = calloc(ncpus, sizeof(int));
 
+       if (!all_fds || !num_fds || !max_fds)
+               err(1, "cannot allocate memory for internal structures");
        for(c=cmin ; c < cmax; c++)
                setup_cpu(c);
 
@@ -106,7 +107,7 @@ measure(void)
                fds = all_fds[c];
                if (options.group) 
                        ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0);
-               else for(i=0; i < num; i++) {
+               else for(i=0; i < num_fds[c]; i++) {
                        ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0);
                        if (ret)
                                err(1, "cannot enable event %s\n", fds[i].name);
@@ -121,7 +122,7 @@ measure(void)
                puts("------------------------");
                for(c = cmin; c < cmax; c++) {
                        fds = all_fds[c];
-                       for(i=0; i < num; i++) {
+                       for(i=0; i < num_fds[c]; i++) {
                                double ratio;
 
                                ret = read(fds[i].fd, values, sizeof(values));
@@ -152,7 +153,7 @@ measure(void)
        }
        for(c = cmin; c < cmax; c++) {
                fds = all_fds[c];
-               for(i=0; i < num; i++)
+               for(i=0; i < num_fds[c]; i++)
                        close(fds[i].fd);
        }
        free(all_fds);
diff --git a/perf_examples/task.c b/perf_examples/task.c
index b29822a..5531eed 100644
--- a/perf_examples/task.c
+++ b/perf_examples/task.c
@@ -35,10 +35,13 @@
 
 #include "perf_util.h"
 
+#define MAX_GROUPS 16
+
 typedef struct {
-       const char *events;
+       const char *events[MAX_GROUPS];
+       int num_groups;
+       int format_group;
        int inherit;
-       int group;
        int print;
        int pin;
        pid_t pid;
@@ -58,12 +61,29 @@ child(char **arg)
        /* not reached */
 }
 
+static int
+get_num_evts_in_group(perf_event_desc_t *fds, int evt, int num)
+{
+       int last_evt;
+
+       for (last_evt = evt + 1; last_evt < num; last_evt++) {
+               if (fds[last_evt].group_leader == last_evt) {
+                       /* This is a new group leader, so the previous
+                        * event was the final event of the preceding
+                        * group.
+                        */
+                       return last_evt - evt;
+               }
+       }
+       return last_evt - evt;
+}
+
 static void
-read_group(perf_event_desc_t *fds, int num)
+read_groups(perf_event_desc_t *fds, int num)
 {
        uint64_t *values;
        size_t sz;
-       int i, ret;
+       int i, evt, ret;
 
        /*
         *      { u64           nr;
@@ -76,55 +96,48 @@ read_group(perf_event_desc_t *fds, int num)
         *
         * we do not use FORMAT_ID in this program
         */
-       sz = sizeof(uint64_t) * (3 + num);
-       values = malloc(sz);
-       if (!values)
-               err(1, "cannot allocate memory for values\n");
-
-       ret = read(fds[0].fd, values, sz);
-       if (ret != sz) { /* unsigned */
-               if (ret == -1)
-                       err(1, "cannot read values event %s", fds[0].name);
-               else    /* likely pinned and could not be loaded */
-                       warnx("could not read event0 ret=%d", ret);
-       }
-
-       /*
-        * propagate to save area
-        */
-       for(i=0; i < num; i++) {
-               values[0] = values[3+i];
-               /*
-                * scaling because we may be sharing the PMU and
-                * thus may be multiplexed
-                */
-               fds[i].prev_value = fds[i].value;
-               fds[i].value = perf_scale(values);
-               fds[i].enabled = values[1];
-               fds[i].running = values[2];
-       }
-       free(values);
-}
 
-static void
-read_single(perf_event_desc_t *fds, int num)
-{
-       uint64_t values[3];
-       int i, ret;
+       for (evt = 0; evt < num; ) {
+               int num_evts_to_read;
 
-       for(i=0; i < num; i++) {
-
-               ret = read(fds[i].fd, values, sizeof(values));
-               if (ret != sizeof(values)) { /* unsigned */
+               if (options.format_group) {
+                       num_evts_to_read = get_num_evts_in_group(fds, evt, num);
+                       sz = sizeof(uint64_t) * (3 + num_evts_to_read);
+               } else {
+                       num_evts_to_read = 1;
+                       sz = sizeof(uint64_t) * 3;
+               }
+               values = malloc(sz);
+               if (!values)
+                       err(1, "cannot allocate memory for values\n");
+               ret = read(fds[evt].fd, values, sz);
+               if (ret != sz) { /* unsigned */
                        if (ret == -1)
-                               err(1, "cannot read values event %s", 
fds[i].name);
-                       else    /* likely pinned and could not be loaded */
-                               warnx("could not read event%d", i);
+                               err(1, "cannot read values event %s",
+                                               fds[0].name);
+                       else
+                               /* likely pinned and could not be loaded */
+                               warnx("could not read event %d, tried to read 
%d bytes, but got %d",
+                                       evt, (int)sz, ret);
                }
-               fds[i].prev_value = fds[i].value;
-               fds[i].value = perf_scale(values);
-               fds[i].enabled = values[1];
-               fds[i].running = values[2];
+
+               /*
+                * propagate to save area
+                */
+               for (i = evt; i < (evt + num_evts_to_read); i++) {
+                       if (options.format_group)
+                               values[0] = values[3 + (i - evt)];
+                       /*
+                        * scaling because we may be sharing the PMU and
+                        * thus may be multiplexed
+                        */
+                       fds[i].prev_value = fds[i].value;
+                       fds[i].value = perf_scale(values);
+                       fds[i].enabled = values[1];
+                       fds[i].running = values[2];
+               }
+               free(values);
+               evt += num_evts_to_read;
        }
 }
 
@@ -133,17 +146,13 @@ print_counts(perf_event_desc_t *fds, int num)
 {
        int i;
 
-       if (options.group)
-               read_group(fds, num);
-       else
-               read_single(fds, num);
+       read_groups(fds, num);
 
        for(i=0; i < num; i++) {
                double ratio;
                uint64_t val;
 
                val = fds[i].value - fds[i].prev_value;
-
                ratio = 0.0;
                if (fds[i].enabled)
                        ratio = 1.0 * fds[i].running / fds[i].enabled;
@@ -167,8 +176,8 @@ static void sig_handler(int n)
 int
 parent(char **arg)
 {
-       perf_event_desc_t *fds;
-       int status, ret, i, num;
+       perf_event_desc_t *fds = NULL;
+       int status, ret, i, num_fds = 0, max_fds = 0, grp, group_fd;
        int ready[2], go[2];
        char buf;
        pid_t pid;
@@ -176,9 +185,12 @@ parent(char **arg)
        if (pfm_initialize() != PFM_SUCCESS)
                errx(1, "libpfm initialization failed");
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num < 1)
-               exit(1);
+       for (grp = 0; grp < options.num_groups; grp++) {
+               int ret, old_num_fds = num_fds;
+               ret = perf_setup_list_events(options.events[grp], &fds, 
&num_fds, &max_fds);
+               if (ret || (num_fds == old_num_fds))
+                       exit(1);
+       }
 
        pid = options.pid;
        if (!pid) {
@@ -232,37 +244,44 @@ parent(char **arg)
        }
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
+               int is_group_leader; /* boolean */
+
+               if (fds[i].group_leader == i) {
+                       /* this is the group leader */
+                       group_fd = -1;
+                       is_group_leader = 1;
+               } else {
+                       group_fd = fds[fds[i].group_leader].fd;
+                       is_group_leader = 0;
+               }
+
                /*
                 * create leader disabled with enable_on-exec
                 */
                if (!options.pid) {
-                       if (options.group) {
-                               fds[i].hw.disabled = !i;
-                               fds[i].hw.enable_on_exec = !i;
-                       } else {
-                               fds[i].hw.disabled = 1;
-                               fds[i].hw.enable_on_exec = 1;
-                       }
+                       fds[i].hw.disabled = is_group_leader;
+                       fds[i].hw.enable_on_exec = is_group_leader;
                }
 
                fds[i].hw.read_format = PERF_FORMAT_SCALE;
                /* request timing information necessary for scaling counts */
-               if (!i && options.group)
-                       fds[0].hw.read_format |= PERF_FORMAT_GROUP;
+               if (is_group_leader && options.format_group)
+                       fds[i].hw.read_format |= PERF_FORMAT_GROUP;
 
                if (options.inherit)
                        fds[i].hw.inherit = 1;
 
-               if (options.pin && ((options.group && i== 0) || 
(!options.group)))
+               if (options.pin && is_group_leader)
                        fds[i].hw.pinned = 1;
-
-               fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, options.group 
? fds[0].fd : -1, 0);
+               fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, group_fd, 0);
                if (fds[i].fd == -1) {
                        warn("cannot attach event%d %s", i, fds[i].name);
                        goto error;
                }
-       }       
+       }
+       ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
+
 
        if (!options.pid)
                close(go[1]);
@@ -271,12 +290,12 @@ parent(char **arg)
                if (!options.pid) {
                        while(waitpid(pid, &status, WNOHANG) == 0) {
                                sleep(1);
-                               print_counts(fds, num);
+                               print_counts(fds, num_fds);
                        }
                } else {
                        while(quit == 0) {
                                sleep(1);
-                               print_counts(fds, num);
+                               print_counts(fds, num_fds);
                        }
                }
        } else {
@@ -284,10 +303,10 @@ parent(char **arg)
                        waitpid(pid, &status, 0);
                else
                        pause();
-               print_counts(fds, num);
+               print_counts(fds, num_fds);
        }
 
-       for(i=0; i < num; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
@@ -305,11 +324,11 @@ usage(void)
        printf("usage: task [-h] [-i] [-g] [-p] [-P] [-t pid] [-e 
event1,event2,...] cmd\n"
                "-h\t\tget help\n"
                "-i\t\tinherit across fork\n"
-               "-g\t\tgroup events\n"
+               "-f\t\tuse PERF_FORMAT_GROUP for reading up counts 
(experimental, not working)\n"
                "-p\t\tprint counts every second\n"
                "-P\t\tpin events\n"
                "-t pid\tmeasure existing pid\n"
-               "-e ev,ev\tlist of events to measure\n"
+               "-e ev,ev\tgroup of events to measure (multiple -e switches are 
allowed)\n"
                );
 }
 
@@ -320,13 +339,18 @@ main(int argc, char **argv)
 
        setlocale(LC_ALL, "");
 
-       while ((c=getopt(argc, argv,"he:igpPt:")) != -1) {
+       while ((c=getopt(argc, argv,"he:ifpPt:")) != -1) {
                switch(c) {
                        case 'e':
-                               options.events = optarg;
+                               if (options.num_groups < MAX_GROUPS) {
+                                       options.events[options.num_groups++] = 
optarg;
+                               } else {
+                                       errx(1, "you cannot specify more than 
%d groups.\n",
+                                               MAX_GROUPS);
+                               }
                                break;
-                       case 'g':
-                               options.group = 1;
+                       case 'f':
+                               options.format_group = 1;
                                break;
                        case 'p':
                                options.print = 1;
@@ -347,9 +371,10 @@ main(int argc, char **argv)
                                errx(1, "unknown error");
                }
        }
-       if (!options.events)
-               options.events = 
"PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
-
+       if (options.num_groups == 0) {
+               options.events[0] = 
"PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
+               options.num_groups = 1;
+       }
        if (!argv[optind] && !options.pid)
                errx(1, "you must specify a command to execute or a thread to 
attach to\n");
        
diff --git a/perf_examples/task_attach_timeout.c 
b/perf_examples/task_attach_timeout.c
index bfcb60d..18f0a3b 100644
--- a/perf_examples/task_attach_timeout.c
+++ b/perf_examples/task_attach_timeout.c
@@ -95,19 +95,19 @@ print_counts(perf_event_desc_t *fds, int num, int do_delta)
 int
 measure(pid_t pid)
 {
-       perf_event_desc_t *fds;
-       int i, num;
+       perf_event_desc_t *fds = NULL;
+       int i, ret, num_fds = 0, max_fds = 0;
        char fn[32];
 
        if (pfm_initialize() != PFM_SUCCESS)
                errx(1, "libpfm initialization failed\n");
 
-       num = perf_setup_list_events(options.events, &fds);
-       if (num < 1)
+       ret = perf_setup_list_events(options.events, &fds, &num_fds, &max_fds);
+       if (ret || (num_fds == 0))
                exit(1);
 
        fds[0].fd = -1;
-       for(i=0; i < num; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].hw.disabled = 0; /* start immediately */
 
                /* request timing information necessary for scaling counts */
@@ -130,15 +130,15 @@ measure(pid_t pid)
                sleep(1);
                options.delay--;
                if (options.print)
-                       print_counts(fds, num, 1);
+                       print_counts(fds, num_fds, 1);
        }
        if (options.delay)
                warn("thread %d terminated before timeout", pid);
 
        if (!options.print)
-               print_counts(fds, num, 0);
+               print_counts(fds, num_fds, 0);
 
-       for(i=0; i < num; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        free(fds);
diff --git a/perf_examples/task_smpl.c b/perf_examples/task_smpl.c
index e73d699..5acb731 100644
--- a/perf_examples/task_smpl.c
+++ b/perf_examples/task_smpl.c
@@ -57,8 +57,8 @@ typedef struct {
 
 static jmp_buf jbuf;
 static uint64_t collected_samples, lost_samples;
-static perf_event_desc_t *fds;
-static int num_events;
+static perf_event_desc_t *fds = NULL;
+static int num_fds = 0, max_fds = 0;
 static options_t options;
 static uint64_t sum_period;
 
@@ -293,7 +293,7 @@ display_sample(perf_event_desc_t *hw, struct 
perf_event_header *ehdr)
 
                                sz -= sizeof(grp);
 
-                               e = perf_id2event(fds, num_events, grp.id);
+                               e = perf_id2event(fds, num_fds, grp.id);
                                if (e == -1)
                                        str = "unknown sample event";
                                else
@@ -395,7 +395,7 @@ display_lost(perf_event_desc_t *hw)
        if (ret)
                errx(1, "cannot read lost info");
 
-       e = perf_id2event(fds, num_events, lost.id);
+       e = perf_id2event(fds, num_fds, lost.id);
        if (e == -1)
                str = "unknown lost event";
        else
@@ -486,8 +486,8 @@ mainloop(char **arg)
        /*
         * does allocate fds
         */
-       num_events = perf_setup_list_events(options.events, &fds);
-       if (num_events == -1)
+       ret  = perf_setup_list_events(options.events, &fds, &num_fds, &max_fds);
+       if (ret || (num_fds == 0))
                errx(1, "cannot setup event list");
 
        memset(pollfds, 0, sizeof(pollfds));
@@ -512,7 +512,7 @@ mainloop(char **arg)
                errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, 
WEXITSTATUS(status));
 
        fds[0].fd = -1;
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
 
                fds[i].hw.disabled = 0; /* start immediately */
 
@@ -538,7 +538,7 @@ mainloop(char **arg)
 
                        /* must get event id for SAMPLE_GROUP */
                        fds[i].hw.read_format = PERF_FORMAT_SCALE;
-                       if (num_events > 1)
+                       if (num_fds > 1)
                                fds[i].hw.read_format |= 
PERF_FORMAT_GROUP|PERF_FORMAT_ID;
                }
 
@@ -573,7 +573,7 @@ mainloop(char **arg)
         * We are skipping the first 3 values (nr, time_enabled, time_running)
         * and then for each event we get a pair of values.
         */
-       sz = (3+2*num_events)*sizeof(uint64_t);
+       sz = (3+2*num_fds)*sizeof(uint64_t);
        val = malloc(sz);
        if (!val)
                err(1, "cannot allocate memory");
@@ -583,7 +583,7 @@ mainloop(char **arg)
                err(1, "cannot read id %zu", sizeof(val));
 
 
-       for(i=0; i < num_events; i++) {
+       for(i=0; i < num_fds; i++) {
                fds[i].id = val[2*i+1+3];
                printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
        }
@@ -617,7 +617,7 @@ terminate_session:
         */
        wait4(pid, &status, 0, NULL);
 
-       for(i=0; i < num_events; i++)
+       for(i=0; i < num_fds; i++)
                close(fds[i].fd);
 
        /* check for partial event buffer */
-- 
1.7.0.4


------------------------------------------------------------------------------
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to