On 5/4/2010 1:25 PM, Stephane Eranian wrote:
> Corey,
> 
> I reworked your patch a little bit. I promoted certain helper functions 
> to perf_util.h.
> 
> I did not really like the extra max_fds parameter to the perf_setup_*().

I was hesitant on that design decision too.. I thought about squirreling away 
those two values in a new data structure which contains both fds and num_fds, 
and max_fds, but at the time I felt like the ripple effect would be too high.  
The idea of sneaking it into the first element of the array didn't occur to me, 
but seems reasonable, and produces a smaller change impact.

> I thought it was too much to carry around. Instead, I stash this value
> into the fds[] first entry. I kept num_fds as a parameter, this way
> the return value is really an error code.

Ok, sounds good.

> I also updated the patch to
> the latest HEAD.

I'm not seeing the patch in the master branch.  Did you remember to push the 
commit up to sourceforge?  These are the two top commits as I see them:

elm3c4.beaverton.ibm.com:corey-401% git log -2
commit 6ae081883cc55f183d60041c12122cb1504d497d
Author: Stephane Eranian <eran...@gmail.com>
Date:   Mon May 3 11:41:58 2010 +0200

    make pfmlib_attr_desc_t use const char *

        To be consistent with events and also because that is
        how the compiler stores the name and description strings.

        Signed-off-by: Stephane Eranian <eran...@gmail.com>

commit 16e8542d9c9cda60c4f6733002028a107dd1ad38
Author: Stephane Eranian <eran...@gmail.com>
Date:   Mon May 3 11:14:56 2010 +0200

    enforce const char * for perf_util.c entry points

        The compiler does store initialized strings
        in read-only sections when you do:
        char *foo = "bar";
        Thus the correct definition is:
        const char *foo = "bar";

        Update all examples to use (or cast) to const char.

        Signed-off-by: Stephane Eranian <eran...@gmail.com>


- Corey


> index 9192df2..598be72 100644
> --- a/perf_examples/notify_group.c
> +++ b/perf_examples/notify_group.c
> @@ -44,7 +44,7 @@ typedef struct {
>  static volatile unsigned long notification_received;
> 
>  static perf_event_desc_t *fds;
> -static int num_events;
> +static int num_fds;
> 
>  static int buffer_pages = 1; /* size of buffer payload  (must be power of 2) 
> */
> 
> @@ -56,7 +56,7 @@ sigio_handler(int n, struct siginfo *info, struct 
> sigcontext *sc)
>       uint64_t ip;
>       int id, ret;
>       
> -     id = perf_fd2event(fds, num_events, info->si_fd);
> +     id = perf_fd2event(fds, num_fds, info->si_fd);
>       if (id == -1)
>               errx(1, "cannot find event for descriptor %d", info->si_fd);
> 
> @@ -129,15 +129,15 @@ main(int argc, char **argv)
>       /*
>        * allocates fd for us
>        */
> -     num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
> +     ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
>                                      "PERF_COUNT_HW_CPU_CYCLES,"
>                                       "PERF_COUNT_HW_CPU_CYCLES",
> -                                     &fds);
> -     if (num_events < 1)
> +                                     &fds, &num_fds);
> +     if (ret || !num_fds)
>               exit(1);
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
>               /* want a notification for each sample added to the buffer */
>               fds[i].hw.disabled =  !!i;
>               printf("i=%d disabled=%d\n", i, fds[i].hw.disabled);
> @@ -180,7 +180,7 @@ main(int argc, char **argv)
>               fds[i].pgmsk = (buffer_pages * pgsz) - 1;
>       }
> 
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
>               ret = ioctl(fds[i].fd, PERF_EVENT_IOC_REFRESH , 1);
>               if (ret == -1)
>                       err(1, "cannot refresh");
> @@ -194,7 +194,7 @@ error:
>       /*
>        * destroy our session
>        */
> -     for(i=0; i < num_events; i++)
> +     for(i=0; i < num_fds; i++)
>               close(fds[i].fd);
> 
>       free(fds);
> diff --git a/perf_examples/notify_self.c b/perf_examples/notify_self.c
> index 12dcfc9..ceee2cd 100644
> --- a/perf_examples/notify_self.c
> +++ b/perf_examples/notify_self.c
> @@ -40,8 +40,8 @@
> 
>  static volatile unsigned long notification_received;
> 
> -static perf_event_desc_t *fds;
> -static int num_events;
> +static perf_event_desc_t *fds = NULL;
> +static int num_fds = 0;
> 
>  static int buffer_pages = 1; /* size of buffer payload (must be power of 2)*/
> 
> @@ -79,7 +79,7 @@ print_sample(int id)
>               if (ret)
>                       errx(1, "cannot read grp");
> 
> -             e = perf_id2event(fds, num_events, grp.id);
> +             e = perf_id2event(fds, num_fds, grp.id);
>               if (e == -1)
>                       str = "unknown event";
>               else
> @@ -119,7 +119,7 @@ sigio_handler(int n, struct siginfo *info, void *uc)
>       if (info->si_code != POLL_HUP)
>               errx(1, "signal not generated by SIGIO");
> 
> -     id = perf_fd2event(fds, num_events, info->si_fd);
> +     id = perf_fd2event(fds, num_fds, info->si_fd);
>       if (id == -1)
>               errx(1, "no event associated with fd=%d", info->si_fd);
> 
> @@ -188,14 +188,14 @@ main(int argc, char **argv)
>       /*
>        * allocates fd for us
>        */
> -     num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
> +     ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
>                                      "PERF_COUNT_HW_INSTRUCTIONS",
> -                                     &fds);
> -     if (num_events < 1)
> +                                     &fds, &num_fds);
> +     if (ret || (num_fds == 0))
>               exit(1);
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
> 
>               /* want a notification for every each added to the buffer */
>               fds[i].hw.disabled = !i;
> @@ -213,7 +213,7 @@ main(int argc, char **argv)
>                       err(1, "cannot attach event %s", fds[i].name);
>       }
>       
> -     sz = (3+2*num_events)*sizeof(uint64_t);
> +     sz = (3+2*num_fds)*sizeof(uint64_t);
>       val = malloc(sz);
>       if (!val)
>               err(1, "cannot allocated memory");
> @@ -245,7 +245,7 @@ main(int argc, char **argv)
>        * We are skipping the first 3 values (nr, time_enabled, time_running)
>        * and then for each event we get a pair of values.
>        */ 
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
>               fds[i].id = val[2*i+1+3];
>               printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
>       }
> @@ -295,7 +295,7 @@ main(int argc, char **argv)
>       /*
>        * destroy our session
>        */
> -     for(i=0; i < num_events; i++)
> +     for(i=0; i < num_fds; i++)
>               close(fds[i].fd);
> 
>       free(fds);
> diff --git a/perf_examples/perf_util.c b/perf_examples/perf_util.c
> index 4afc978..9c99b15 100644
> --- a/perf_examples/perf_util.c
> +++ b/perf_examples/perf_util.c
> @@ -31,69 +31,86 @@
>  #include <perfmon/pfmlib_perf_event.h>
>  #include "perf_util.h"
> 
> +/* the **fd parameter must point to a null pointer on the first call
> + * max_fds and num_fds must both point to a zero value on the first call
> + * The return value is success (0) vs. failure (non-zero)
> + */
>  int
> -perf_setup_argv_events(const char **argv, perf_event_desc_t **fd)
> +perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int 
> *num_fds)
>  {
> -     perf_event_desc_t *fdt = NULL;
> -     int num = 0, max_fd = 0, new_max;
> -     int ret;
> +     perf_event_desc_t *fd;
> +     int new_max, ret, num, max_fds;
> +     int group_leader;
> 
> -     if (!(argv && fd))
> +     if (!(argv && fds && num_fds))
>               return -1;
> 
> -     
> -     while(*argv) {
> -             if (num == max_fd) {
> +     fd = *fds;
> +     if (fd) {
> +             max_fds = fd[0].max_fds;
> +             if (max_fds < 2)
> +                     return -1;
> +             num = *num_fds;
> +     } else {
> +             max_fds = num = 0; /* bootstrap */
> +     }
> +     group_leader = num;
> 
> -                     if (!max_fd)
> +     while(*argv) {
> +             if (num == max_fds) {
> +                     if (max_fds == 0)
>                               new_max = 2;
>                       else
> -                             new_max = max_fd << 1;
> +                             new_max = max_fds << 1;
> 
> -                     if (new_max < max_fd) {
> +                     if (new_max < max_fds) {
>                               warn("too many entries");
>                               goto error;
>                       }
> -                     fdt = realloc(fdt, new_max * sizeof(*fdt));
> -                     if (!fdt) {
> +                     fd = realloc(fd, new_max * sizeof(*fd));
> +                     if (!fd) {
>                               warn("cannot allocate memory");
>                               goto error;
>                       }
>                       /* reset newly allocated chunk */
> -                     memset(fdt+max_fd, 0, (new_max - max_fd) * 
> sizeof(*fdt));
> -                     max_fd = new_max;
> +                     memset(fd + max_fds, 0, (new_max - max_fds) * 
> sizeof(*fd));
> +                     max_fds = new_max;
> +
> +                     /* update max size */
> +                     fd[0].max_fds = max_fds;
>               }
> 
> -             ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, 
> &fdt[num].hw, NULL, NULL);
> +             ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, &fd[num].hw, 
> NULL, NULL);
>               if (ret != PFM_SUCCESS) {
>                       warnx("event %s: %s\n", *argv, pfm_strerror(ret));
>                       goto error;
>               }
>               /* ABI compatibility */
> -             fdt[num].hw.size = sizeof(struct perf_event_attr);
> -             fdt[num].name = *argv;
> +             fd[num].hw.size = sizeof(struct perf_event_attr);
> +
> +             fd[num].name = *argv;
> +             fd[num].group_leader = group_leader;
>               num++;
>               argv++;
>       }
> -     *fd = fdt;
> -     return num;
> +     *num_fds = num;
> +     *fds = fd;
> +     return 0;
>  error:
> -     free(fdt);
> +     free(fd);
>       return -1;
>  }
> 
>  int
> -perf_setup_list_events(const char *ev, perf_event_desc_t **fd)
> +perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds)
>  {
>       const char **argv;
> -     char *p, *q;
> -     char *events;
> -     int num = 0, i, ret;
> +     char *p, *q, *events;
> +     int i, ret, num = 0;
> 
> -     if (!(ev && fd))
> +     if (!(ev && fd && num_fds))
>               return -1;
> 
> -     
>       events = strdup(ev);
>       if (!events)
>               return -1;
> @@ -105,7 +122,7 @@ perf_setup_list_events(const char *ev, perf_event_desc_t 
> **fd)
>       }
>       num++;
>       num++; /* terminator */
> -  
> +
>       argv = malloc(num * sizeof(char *));
>       if (!argv) {
>               free(events);
> @@ -119,12 +136,35 @@ perf_setup_list_events(const char *ev, 
> perf_event_desc_t **fd)
>       }
>       argv[i++] = q;
>       argv[i] = NULL;
> -     ret = perf_setup_argv_events(argv, fd);
> +     ret = perf_setup_argv_events(argv, fd, num_fds);
>       free(argv);
>       return ret;
>  }
> 
>  int
> +perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx)
> +{
> +     int leader;
> +     int i;
> +
> +     if (idx < 0 || idx >= num)
> +             return 0;
> +
> +     leader = fds[idx].group_leader;
> +
> +     for (i = leader + 1; i < num; i++) {
> +             if (fds[i].group_leader != leader) {
> +                     /* This is a new group leader, so the previous
> +                      * event was the final event of the preceding
> +                      * group.
> +                      */
> +                     return i - leader;
> +             }
> +     }
> +     return 1;
> +}
> +
> +int
>  perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, void *buf, 
> size_t sz)
>  {
>       char *data;
> diff --git a/perf_examples/perf_util.h b/perf_examples/perf_util.h
> index 8f5222f..c65c003 100644
> --- a/perf_examples/perf_util.h
> +++ b/perf_examples/perf_util.h
> @@ -36,14 +36,16 @@ typedef struct {
>       uint64_t id; /* event id kernel */
>       void *buf;
>       size_t pgmsk;
> +     int group_leader;
>       int fd;
> +     int max_fds;
>  } perf_event_desc_t;
> 
>  /* handy shortcut */
>  #define PERF_FORMAT_SCALE 
> (PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING)
> 
> -extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd);
> -extern int perf_setup_list_events(const char *events, perf_event_desc_t 
> **fd);
> +extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd, 
> int *num_fds);
> +extern int perf_setup_list_events(const char *events, perf_event_desc_t 
> **fd, int *num_fds);
>  extern int perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, 
> void *buf, size_t sz);
>  extern void perf_skip_buffer(struct perf_event_mmap_page *hdr, size_t sz);
> 
> @@ -109,4 +111,13 @@ perf_id2event(perf_event_desc_t *fds, int num_events, 
> uint64_t id)
>                       return j;
>       return -1;
>  }
> +
> +static inline int
> +perf_is_group_leader(perf_event_desc_t *fds, int idx)
> +{
> +     return fds[idx].group_leader == idx;
> +}
> +
> +extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int 
> leader);
> +
>  #endif
> diff --git a/perf_examples/self.c b/perf_examples/self.c
> index 12ebcd4..11e3d85 100644
> --- a/perf_examples/self.c
> +++ b/perf_examples/self.c
> @@ -65,9 +65,9 @@ noploop(void)
>  int
>  main(int argc, char **argv)
>  {
> -     perf_event_desc_t *fds;
> +     perf_event_desc_t *fds = NULL;
>       uint64_t values[3];
> -     int i, ret, num;
> +     int i, ret, num_fds = 0;
> 
>       setlocale(LC_ALL, "");
>       /*
> @@ -77,12 +77,12 @@ main(int argc, char **argv)
>       if (ret != PFM_SUCCESS)
>               errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
> 
> -     num = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : 
> gen_events, &fds);
> -     if (num == -1)
> +     ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : 
> gen_events, &fds, &num_fds);
> +     if (ret || !num_fds)
>               errx(1, "cannot setup events");
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num; i++) {
> +     for(i=0; i < num_fds; i++) {
>               /* request timing information necessary for scaling */
>               fds[i].hw.read_format = PERF_FORMAT_SCALE;
> 
> @@ -97,8 +97,8 @@ main(int argc, char **argv)
>       signal(SIGALRM, sig_handler);
> 
>       /*
> -      * enable all counters attached to this thread and created by it
> -      */
> +      * enable all counters attached to this thread and created by it
> +      */
>       ret = prctl(PR_TASK_PERF_EVENTS_ENABLE);
>       if (ret)
>               err(1, "prctl(enable) failed");
> @@ -108,8 +108,8 @@ main(int argc, char **argv)
>       noploop();
> 
>       /*
> -      * disable all counters attached to this thread
> -      */
> +      * disable all counters attached to this thread
> +      */
>       ret = prctl(PR_TASK_PERF_EVENTS_DISABLE);
>       if (ret)
>               err(1, "prctl(disable) failed");
> @@ -121,7 +121,7 @@ main(int argc, char **argv)
>        */
>       memset(values, 0, sizeof(values));
> 
> -     for (i=0; i < num; i++) {
> +     for (i=0; i < num_fds; i++) {
>               uint64_t val;
>               double ratio;
> 
> diff --git a/perf_examples/self_count.c b/perf_examples/self_count.c
> index 48855fd..c77456d 100644
> --- a/perf_examples/self_count.c
> +++ b/perf_examples/self_count.c
> @@ -179,10 +179,10 @@ read_count(perf_event_desc_t *fds)
>  int
>  main(int argc, char **argv)
>  {
> -     perf_event_desc_t *fds;
> +     perf_event_desc_t *fds = NULL;
>       size_t pgsz;
>       uint64_t val;
> -     int i, ret, num;
> +     int i, ret, num_fds = 0;
>       int n = 30;
> 
>       pgsz = sysconf(_SC_PAGESIZE);
> @@ -193,12 +193,12 @@ main(int argc, char **argv)
>       if (ret != PFM_SUCCESS)
>               errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
> 
> -     num = perf_setup_argv_events(argc > 1 ? (const char **)(argv+1) : 
> gen_events, &fds);
> -     if (num == -1)
> +     ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : 
> gen_events, &fds, &num_fds);
> +     if (ret || !num_fds)
>               errx(1, "cannot setup events");
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num; i++) {
> +     for(i=0; i < num_fds; i++) {
>               /* request timing information necesaary for scaling */
>               fds[i].hw.read_format = PERF_FORMAT_SCALE;
>               fds[i].hw.disabled = 0;
> @@ -215,15 +215,15 @@ main(int argc, char **argv)
>       signal(SIGALRM, sig_handler);
> 
>       /*
> -      * enable all counters attached to this thread
> -      */
> +      * enable all counters attached to this thread
> +      */
>       ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0);
> 
>       alarm(10);
> 
>       for(;quit == 0;) {
> -             
> -             for (i=0; i < num; i++) {
> +
> +             for (i=0; i < num_fds; i++) {
>                       val = read_count(&fds[i]);
>                       printf("%20"PRIu64" %s\n", val, fds[i].name);
>               }
> @@ -233,11 +233,11 @@ main(int argc, char **argv)
>                       n = 30;
>       }
>       /*
> -      * disable all counters attached to this thread
> -      */
> +      * disable all counters attached to this thread
> +      */
>       ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
> 
> -     for (i=0; i < num; i++) {
> +     for (i=0; i < num_fds; i++) {
>               munmap(fds[i].buf, pgsz);
>               close(fds[i].fd);
>       }
> diff --git a/perf_examples/self_pipe.c b/perf_examples/self_pipe.c
> index 8fb919a..90a90ff 100644
> --- a/perf_examples/self_pipe.c
> +++ b/perf_examples/self_pipe.c
> @@ -85,8 +85,9 @@ static void
>  measure(void)
>  {
>       perf_event_desc_t *fds = NULL;
> +     int num_fds = 0;
>       uint64_t values[3];
> -     int i, ret, num;
> +     int i, ret;
>       int pr[2], pw[2];
>       ssize_t nbytes;
>       pid_t pid;
> @@ -109,11 +110,11 @@ measure(void)
>       if (ret)
>               err(1, "cannot create write pipe");
> 
> -     num = perf_setup_list_events(options.events, &fds);
> -     if (num < 1)
> +     ret = perf_setup_list_events(options.events, &fds, &num_fds);
> +     if (ret || !num_fds)
>               exit(1);
> 
> -     for(i=0; i < num; i++) {
> +     for(i=0; i < num_fds; i++) {
>               fds[i].hw.disabled = 1;
>               fds[i].hw.read_format = PERF_FORMAT_SCALE;
> 
> @@ -141,7 +142,7 @@ measure(void)
>                       err(1, "cannot create child\n");
>               case 0:
>                       /* do not inherit session fd */
> -                     for(i=0; i < num; i++)
> +                     for(i=0; i < num_fds; i++)
>                               close(fds[i].fd);
>                       /* pr[]: write master, read child */
>                       /* pw[]: read master, write child */
> @@ -170,7 +171,7 @@ measure(void)
> 
>       prctl(PR_TASK_PERF_EVENTS_DISABLE);
> 
> -     for(i=0; i < num; i++) {
> +     for(i=0; i < num_fds; i++) {
>               uint64_t val;
>               double ratio;
> 
> @@ -204,7 +205,7 @@ measure(void)
>       /*
>        * and destroy our session
>        */
> -     for(i=0; i < num; i++)
> +     for(i=0; i < num_fds; i++)
>               close(fds[i].fd);
> 
>       free(fds);
> diff --git a/perf_examples/self_smpl_multi.c b/perf_examples/self_smpl_multi.c
> index 30d6acb..0ca45f5 100644
> --- a/perf_examples/self_smpl_multi.c
> +++ b/perf_examples/self_smpl_multi.c
> @@ -77,7 +77,6 @@ static int program_time = PROGRAM_TIME;
>  static int threshold = THRESHOLD;
>  static int signum = SIGIO;
>  static pthread_barrier_t barrier;
> -static int num_events;
> 
> 
>  static int buffer_pages = 1;
> @@ -120,8 +119,9 @@ long bad_msg[MAX_THR];
>  long bad_restart[MAX_THR];
>  int fown;
> 
> -int __thread myid; /* TLS */
> +static int __thread myid; /* TLS */
>  static perf_event_desc_t __thread *fds; /* TLS */
> +static int __thread num_fds; /* TLS */
> 
>  pid_t
>  gettid(void)
> @@ -277,8 +277,10 @@ overflow_start(char *name)
>       size_t pgsz;
>       int ret, fd, flags;
> 
> -     num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds);
> -     if (num_events != 1)
> +     fds = NULL;
> +     num_fds = 0;
> +     ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds, 
> &num_fds);
> +     if (ret || !num_fds)
>               errx(1, "cannot monitor event");
> 
>       pgsz = sysconf(_SC_PAGESIZE);
> diff --git a/perf_examples/syst.c b/perf_examples/syst.c
> index 429c6c4..a681e41 100644
> --- a/perf_examples/syst.c
> +++ b/perf_examples/syst.c
> @@ -43,22 +43,21 @@ typedef struct {
> 
>  static options_t options;
>  static perf_event_desc_t **all_fds;
> -static int num;
> +static int *num_fds;
> 
>  void
>  setup_cpu(int cpu)
>  {
> -     perf_event_desc_t *fds = NULL;
> -     int i;
> +     perf_event_desc_t *fds;
> +     int i, ret;
> 
> -     num = perf_setup_list_events(options.events, &fds);
> -     if (num == -1)
> +     ret = perf_setup_list_events(options.events, &all_fds[cpu], 
> &num_fds[cpu]);
> +     if (ret || (num_fds == 0))
>               errx(1, "cannot setup events\n");
> -
> -     all_fds[cpu] = fds;
> +     fds = all_fds[cpu]; /* temp */
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num; i++) {
> +     for(i=0; i < num_fds[cpu]; i++) {
>               fds[i].hw.disabled = options.group ? !i : 1;
> 
>               if (options.excl && ((options.group && !i) || (!options.group)))
> @@ -92,10 +91,11 @@ measure(void)
>               cmax = cmin + 1;
>               ncpus = 1;
>       }
> -     all_fds = malloc(ncpus * sizeof(perf_event_desc_t));
> -     if (!all_fds)
> -             err(1, "cannot allocate memory for all_fds");
> +     all_fds = calloc(ncpus, sizeof(perf_event_desc_t));
> +     num_fds = calloc(ncpus, sizeof(int));
> 
> +     if (!all_fds || !num_fds)
> +             err(1, "cannot allocate memory for internal structures");
>       for(c=cmin ; c < cmax; c++)
>               setup_cpu(c);
> 
> @@ -106,7 +106,7 @@ measure(void)
>               fds = all_fds[c];
>               if (options.group) 
>                       ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0);
> -             else for(i=0; i < num; i++) {
> +             else for(i=0; i < num_fds[c]; i++) {
>                       ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0);
>                       if (ret)
>                               err(1, "cannot enable event %s\n", fds[i].name);
> @@ -121,7 +121,7 @@ measure(void)
>               puts("------------------------");
>               for(c = cmin; c < cmax; c++) {
>                       fds = all_fds[c];
> -                     for(i=0; i < num; i++) {
> +                     for(i=0; i < num_fds[c]; i++) {
>                               double ratio;
> 
>                               ret = read(fds[i].fd, values, sizeof(values));
> @@ -152,7 +152,7 @@ measure(void)
>       }
>       for(c = cmin; c < cmax; c++) {
>               fds = all_fds[c];
> -             for(i=0; i < num; i++)
> +             for(i=0; i < num_fds[c]; i++)
>                       close(fds[i].fd);
>       }
>       free(all_fds);
> diff --git a/perf_examples/task.c b/perf_examples/task.c
> index b29822a..6c37b84 100644
> --- a/perf_examples/task.c
> +++ b/perf_examples/task.c
> @@ -35,10 +35,13 @@
> 
>  #include "perf_util.h"
> 
> +#define MAX_GROUPS 16
> +
>  typedef struct {
> -     const char *events;
> +     const char *events[MAX_GROUPS];
> +     int num_groups;
> +     int format_group;
>       int inherit;
> -     int group;
>       int print;
>       int pin;
>       pid_t pid;
> @@ -59,11 +62,11 @@ child(char **arg)
>  }
> 
>  static void
> -read_group(perf_event_desc_t *fds, int num)
> +read_groups(perf_event_desc_t *fds, int num)
>  {
> -     uint64_t *values;
> -     size_t sz;
> -     int i, ret;
> +     uint64_t *values = NULL;
> +     size_t new_sz, sz = 0;
> +     int i, evt, ret;
> 
>       /*
>        *      { u64           nr;
> @@ -76,56 +79,55 @@ read_group(perf_event_desc_t *fds, int num)
>        *
>        * we do not use FORMAT_ID in this program
>        */
> -     sz = sizeof(uint64_t) * (3 + num);
> -     values = malloc(sz);
> -     if (!values)
> -             err(1, "cannot allocate memory for values\n");
> -
> -     ret = read(fds[0].fd, values, sz);
> -     if (ret != sz) { /* unsigned */
> -             if (ret == -1)
> -                     err(1, "cannot read values event %s", fds[0].name);
> -             else    /* likely pinned and could not be loaded */
> -                     warnx("could not read event0 ret=%d", ret);
> -     }
> 
> -     /*
> -      * propagate to save area
> -      */
> -     for(i=0; i < num; i++) {
> -             values[0] = values[3+i];
> -             /*
> -              * scaling because we may be sharing the PMU and
> -              * thus may be multiplexed
> -              */
> -             fds[i].prev_value = fds[i].value;
> -             fds[i].value = perf_scale(values);
> -             fds[i].enabled = values[1];
> -             fds[i].running = values[2];
> -     }
> -     free(values);
> -}
> +     for (evt = 0; evt < num; ) {
> +             int num_evts_to_read;
> 
> -static void
> -read_single(perf_event_desc_t *fds, int num)
> -{
> -     uint64_t values[3];
> -     int i, ret;
> +             if (options.format_group) {
> +                     num_evts_to_read = perf_get_group_nevents(fds, num, 
> evt);
> +                     new_sz = sizeof(uint64_t) * (3 + num_evts_to_read);
> +             } else {
> +                     num_evts_to_read = 1;
> +                     new_sz = sizeof(uint64_t) * 3;
> +             }
> 
> -     for(i=0; i < num; i++) {
> +             if (new_sz > sz) {
> +                     sz = new_sz;
> +                     values = realloc(values, sz);
> +             }
> 
> -             ret = read(fds[i].fd, values, sizeof(values));
> -             if (ret != sizeof(values)) { /* unsigned */
> +             if (!values)
> +                     err(1, "cannot allocate memory for values\n");
> +
> +             ret = read(fds[evt].fd, values, sz);
> +             if (ret != sz) { /* unsigned */
>                       if (ret == -1)
> -                             err(1, "cannot read values event %s", 
> fds[i].name);
> -                     else    /* likely pinned and could not be loaded */
> -                             warnx("could not read event%d", i);
> +                             err(1, "cannot read values event %s", 
> fds[0].name);
> +
> +                     /* likely pinned and could not be loaded */
> +                     warnx("could not read event %d, tried to read %d bytes, 
> but got %d",
> +                             evt, (int)sz, ret);
> +             }
> +
> +             /*
> +              * propagate to save area
> +              */
> +             for (i = evt; i < (evt + num_evts_to_read); i++) {
> +                     if (options.format_group)
> +                             values[0] = values[3 + (i - evt)];
> +                     /*
> +                      * scaling because we may be sharing the PMU and
> +                      * thus may be multiplexed
> +                      */
> +                     fds[i].prev_value = fds[i].value;
> +                     fds[i].value = perf_scale(values);
> +                     fds[i].enabled = values[1];
> +                     fds[i].running = values[2];
>               }
> -             fds[i].prev_value = fds[i].value;
> -             fds[i].value = perf_scale(values);
> -             fds[i].enabled = values[1];
> -             fds[i].running = values[2];
> +             evt += num_evts_to_read;
>       }
> +     if (values)
> +             free(values);
>  }
> 
>  static void
> @@ -133,21 +135,21 @@ print_counts(perf_event_desc_t *fds, int num)
>  {
>       int i;
> 
> -     if (options.group)
> -             read_group(fds, num);
> -     else
> -             read_single(fds, num);
> +     read_groups(fds, num);
> 
>       for(i=0; i < num; i++) {
>               double ratio;
>               uint64_t val;
> 
>               val = fds[i].value - fds[i].prev_value;
> -
>               ratio = 0.0;
>               if (fds[i].enabled)
>                       ratio = 1.0 * fds[i].running / fds[i].enabled;
> 
> +             /* separate groups */
> +             if (i && fds[i].hw.enable_on_exec)
> +                     putchar('\n');
> +
>               if (ratio == 1.0)
>                       printf("%'20"PRIu64" %s (%'"PRIu64" : %'"PRIu64")\n", 
> val, fds[i].name, fds[i].enabled, fds[i].running);
>               else
> @@ -167,8 +169,8 @@ static void sig_handler(int n)
>  int
>  parent(char **arg)
>  {
> -     perf_event_desc_t *fds;
> -     int status, ret, i, num;
> +     perf_event_desc_t *fds = NULL;
> +     int status, ret, i, num_fds = 0, grp, group_fd;
>       int ready[2], go[2];
>       char buf;
>       pid_t pid;
> @@ -176,9 +178,12 @@ parent(char **arg)
>       if (pfm_initialize() != PFM_SUCCESS)
>               errx(1, "libpfm initialization failed");
> 
> -     num = perf_setup_list_events(options.events, &fds);
> -     if (num < 1)
> -             exit(1);
> +     for (grp = 0; grp < options.num_groups; grp++) {
> +             int ret;
> +             ret = perf_setup_list_events(options.events[grp], &fds, 
> &num_fds);
> +             if (ret || !num_fds)
> +                     exit(1);
> +     }
> 
>       pid = options.pid;
>       if (!pid) {
> @@ -232,37 +237,43 @@ parent(char **arg)
>       }
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num; i++) {
> +     for(i=0; i < num_fds; i++) {
> +             int is_group_leader; /* boolean */
> +
> +             is_group_leader = perf_is_group_leader(fds, i);
> +             if (is_group_leader) {
> +                     /* this is the group leader */
> +                     group_fd = -1;
> +             } else {
> +                     group_fd = fds[fds[i].group_leader].fd;
> +             }
> +
>               /*
>                * create leader disabled with enable_on-exec
>                */
>               if (!options.pid) {
> -                     if (options.group) {
> -                             fds[i].hw.disabled = !i;
> -                             fds[i].hw.enable_on_exec = !i;
> -                     } else {
> -                             fds[i].hw.disabled = 1;
> -                             fds[i].hw.enable_on_exec = 1;
> -                     }
> +                     fds[i].hw.disabled = is_group_leader;
> +                     fds[i].hw.enable_on_exec = is_group_leader;
>               }
> 
>               fds[i].hw.read_format = PERF_FORMAT_SCALE;
>               /* request timing information necessary for scaling counts */
> -             if (!i && options.group)
> -                     fds[0].hw.read_format |= PERF_FORMAT_GROUP;
> +             if (is_group_leader && options.format_group)
> +                     fds[i].hw.read_format |= PERF_FORMAT_GROUP;
> 
>               if (options.inherit)
>                       fds[i].hw.inherit = 1;
> 
> -             if (options.pin && ((options.group && i== 0) || 
> (!options.group)))
> +             if (options.pin && is_group_leader)
>                       fds[i].hw.pinned = 1;
> -
> -             fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, options.group 
> ? fds[0].fd : -1, 0);
> +             fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, group_fd, 0);
>               if (fds[i].fd == -1) {
>                       warn("cannot attach event%d %s", i, fds[i].name);
>                       goto error;
>               }
> -     }       
> +     }
> +     ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
> +
> 
>       if (!options.pid)
>               close(go[1]);
> @@ -271,12 +282,12 @@ parent(char **arg)
>               if (!options.pid) {
>                       while(waitpid(pid, &status, WNOHANG) == 0) {
>                               sleep(1);
> -                             print_counts(fds, num);
> +                             print_counts(fds, num_fds);
>                       }
>               } else {
>                       while(quit == 0) {
>                               sleep(1);
> -                             print_counts(fds, num);
> +                             print_counts(fds, num_fds);
>                       }
>               }
>       } else {
> @@ -284,10 +295,10 @@ parent(char **arg)
>                       waitpid(pid, &status, 0);
>               else
>                       pause();
> -             print_counts(fds, num);
> +             print_counts(fds, num_fds);
>       }
> 
> -     for(i=0; i < num; i++)
> +     for(i=0; i < num_fds; i++)
>               close(fds[i].fd);
> 
>       free(fds);
> @@ -305,11 +316,11 @@ usage(void)
>       printf("usage: task [-h] [-i] [-g] [-p] [-P] [-t pid] [-e 
> event1,event2,...] cmd\n"
>               "-h\t\tget help\n"
>               "-i\t\tinherit across fork\n"
> -             "-g\t\tgroup events\n"
> +             "-f\t\tuse PERF_FORMAT_GROUP for reading up counts 
> (experimental, not working)\n"
>               "-p\t\tprint counts every second\n"
>               "-P\t\tpin events\n"
>               "-t pid\tmeasure existing pid\n"
> -             "-e ev,ev\tlist of events to measure\n"
> +             "-e ev,ev\tgroup of events to measure (multiple -e switches are 
> allowed)\n"
>               );
>  }
> 
> @@ -320,13 +331,18 @@ main(int argc, char **argv)
> 
>       setlocale(LC_ALL, "");
> 
> -     while ((c=getopt(argc, argv,"he:igpPt:")) != -1) {
> +     while ((c=getopt(argc, argv,"he:ifpPt:")) != -1) {
>               switch(c) {
>                       case 'e':
> -                             options.events = optarg;
> +                             if (options.num_groups < MAX_GROUPS) {
> +                                     options.events[options.num_groups++] = 
> optarg;
> +                             } else {
> +                                     errx(1, "you cannot specify more than 
> %d groups.\n",
> +                                             MAX_GROUPS);
> +                             }
>                               break;
> -                     case 'g':
> -                             options.group = 1;
> +                     case 'f':
> +                             options.format_group = 1;
>                               break;
>                       case 'p':
>                               options.print = 1;
> @@ -347,12 +363,13 @@ main(int argc, char **argv)
>                               errx(1, "unknown error");
>               }
>       }
> -     if (!options.events)
> -             options.events = 
> "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
> -
> +     if (options.num_groups == 0) {
> +             options.events[0] = 
> "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
> +             options.num_groups = 1;
> +     }
>       if (!argv[optind] && !options.pid)
>               errx(1, "you must specify a command to execute or a thread to 
> attach to\n");
> -     
> +
>       signal(SIGINT, sig_handler);
> 
>       return parent(argv+optind);
> diff --git a/perf_examples/task_attach_timeout.c 
> b/perf_examples/task_attach_timeout.c
> index bfcb60d..a3bffb0 100644
> --- a/perf_examples/task_attach_timeout.c
> +++ b/perf_examples/task_attach_timeout.c
> @@ -95,19 +95,19 @@ print_counts(perf_event_desc_t *fds, int num, int 
> do_delta)
>  int
>  measure(pid_t pid)
>  {
> -     perf_event_desc_t *fds;
> -     int i, num;
> +     perf_event_desc_t *fds = NULL;
> +     int i, ret, num_fds = 0;
>       char fn[32];
> 
>       if (pfm_initialize() != PFM_SUCCESS)
>               errx(1, "libpfm initialization failed\n");
> 
> -     num = perf_setup_list_events(options.events, &fds);
> -     if (num < 1)
> +     ret = perf_setup_list_events(options.events, &fds, &num_fds);
> +     if (ret || (num_fds == 0))
>               exit(1);
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num; i++) {
> +     for(i=0; i < num_fds; i++) {
>               fds[i].hw.disabled = 0; /* start immediately */
> 
>               /* request timing information necessary for scaling counts */
> @@ -130,15 +130,15 @@ measure(pid_t pid)
>               sleep(1);
>               options.delay--;
>               if (options.print)
> -                     print_counts(fds, num, 1);
> +                     print_counts(fds, num_fds, 1);
>       }
>       if (options.delay)
>               warn("thread %d terminated before timeout", pid);
> 
>       if (!options.print)
> -             print_counts(fds, num, 0);
> +             print_counts(fds, num_fds, 0);
> 
> -     for(i=0; i < num; i++)
> +     for(i=0; i < num_fds; i++)
>               close(fds[i].fd);
> 
>       free(fds);
> diff --git a/perf_examples/task_smpl.c b/perf_examples/task_smpl.c
> index 7d029db..2ceeb01 100644
> --- a/perf_examples/task_smpl.c
> +++ b/perf_examples/task_smpl.c
> @@ -58,7 +58,7 @@ typedef struct {
>  static jmp_buf jbuf;
>  static uint64_t collected_samples, lost_samples;
>  static perf_event_desc_t *fds;
> -static int num_events;
> +static int num_fds;
>  static options_t options;
>  static uint64_t sum_period;
> 
> @@ -293,7 +293,7 @@ display_sample(perf_event_desc_t *hw, struct 
> perf_event_header *ehdr)
> 
>                               sz -= sizeof(grp);
> 
> -                             e = perf_id2event(fds, num_events, grp.id);
> +                             e = perf_id2event(fds, num_fds, grp.id);
>                               if (e == -1)
>                                       str = "unknown sample event";
>                               else
> @@ -395,7 +395,7 @@ display_lost(perf_event_desc_t *hw)
>       if (ret)
>               errx(1, "cannot read lost info");
> 
> -     e = perf_id2event(fds, num_events, lost.id);
> +     e = perf_id2event(fds, num_fds, lost.id);
>       if (e == -1)
>               str = "unknown lost event";
>       else
> @@ -486,8 +486,8 @@ mainloop(char **arg)
>       /*
>        * does allocate fds
>        */
> -     num_events = perf_setup_list_events(options.events, &fds);
> -     if (num_events == -1)
> +     ret  = perf_setup_list_events(options.events, &fds, &num_fds);
> +     if (ret || !num_fds)
>               errx(1, "cannot setup event list");
> 
>       memset(pollfds, 0, sizeof(pollfds));
> @@ -512,7 +512,7 @@ mainloop(char **arg)
>               errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, 
> WEXITSTATUS(status));
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
> 
>               fds[i].hw.disabled = 0; /* start immediately */
> 
> @@ -538,7 +538,7 @@ mainloop(char **arg)
> 
>                       /* must get event id for SAMPLE_GROUP */
>                       fds[i].hw.read_format = PERF_FORMAT_SCALE;
> -                     if (num_events > 1)
> +                     if (num_fds > 1)
>                               fds[i].hw.read_format |= 
> PERF_FORMAT_GROUP|PERF_FORMAT_ID;
>               }
> 
> @@ -573,7 +573,7 @@ mainloop(char **arg)
>        * We are skipping the first 3 values (nr, time_enabled, time_running)
>        * and then for each event we get a pair of values.
>        */
> -     sz = (3+2*num_events)*sizeof(uint64_t);
> +     sz = (3+2*num_fds)*sizeof(uint64_t);
>       val = malloc(sz);
>       if (!val)
>               err(1, "cannot allocate memory");
> @@ -583,7 +583,7 @@ mainloop(char **arg)
>               err(1, "cannot read id %zu", sizeof(val));
> 
> 
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
>               fds[i].id = val[2*i+1+3];
>               printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
>       }
> @@ -617,7 +617,7 @@ terminate_session:
>        */
>       wait4(pid, &status, 0, NULL);
> 
> -     for(i=0; i < num_events; i++)
> +     for(i=0; i < num_fds; i++)
>               close(fds[i].fd);
> 
>       /* check for partial event buffer */
> diff --git a/perf_examples/x86/bts_smpl.c b/perf_examples/x86/bts_smpl.c
> index 46237bd..b28fd06 100644
> --- a/perf_examples/x86/bts_smpl.c
> +++ b/perf_examples/x86/bts_smpl.c
> @@ -59,7 +59,7 @@ typedef struct {
>  static jmp_buf jbuf;
>  static uint64_t collected_samples, lost_samples;
>  static perf_event_desc_t *fds;
> -static int num_events;
> +static int num_fds;
>  static options_t options;
> 
>  static struct option the_options[]={
> @@ -172,7 +172,7 @@ display_lost(perf_event_desc_t *hw)
>       if (ret)
>               errx(1, "cannot read lost info");
> 
> -     e = perf_id2event(fds, num_events, lost.id);
> +     e = perf_id2event(fds, num_fds, lost.id);
>       if (e == -1)
>               str = "unknown lost event";
>       else
> @@ -239,8 +239,8 @@ mainloop(char **arg)
>       /*
>        * does allocate fds
>        */
> -     num_events = 
> perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds);
> -     if (num_events == -1)
> +     ret = perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds, 
> &num_fds);
> +     if (ret || !num_fds)
>               errx(1, "cannot setup event");
> 
>       memset(pollfds, 0, sizeof(pollfds));
> diff --git a/perf_examples/x86/pebs_smpl.c b/perf_examples/x86/pebs_smpl.c
> index 83ba098..f24aa44 100644
> --- a/perf_examples/x86/pebs_smpl.c
> +++ b/perf_examples/x86/pebs_smpl.c
> @@ -54,7 +54,7 @@ typedef struct {
>  static jmp_buf jbuf;
>  static uint64_t collected_samples, lost_samples;
>  static perf_event_desc_t *fds;
> -static int num_events;
> +static int num_fds;
>  static options_t options;
>  static uint64_t sum_period;
> 
> @@ -353,7 +353,7 @@ display_sample(perf_event_desc_t *hw, struct 
> perf_event_header *ehdr)
> 
>                       sz -= sizeof(grp);
> 
> -                     e = perf_id2event(fds, num_events, grp.id);
> +                     e = perf_id2event(fds, num_fds, grp.id);
>                       if (e == -1)
>                               str = "unknown sample event";
>                       else
> @@ -419,7 +419,7 @@ display_lost(perf_event_desc_t *hw)
>       if (ret)
>               errx(1, "cannot read lost info");
> 
> -     e = perf_id2event(fds, num_events, lost.id);
> +     e = perf_id2event(fds, num_fds, lost.id);
>       if (e == -1)
>               str = "unknown lost event";
>       else
> @@ -510,8 +510,8 @@ mainloop(char **arg)
>       /*
>        * does allocate fds
>        */
> -     num_events = perf_setup_list_events(options.events, &fds);
> -     if (num_events == -1)
> +     ret = perf_setup_list_events(options.events, &fds, &num_fds);
> +     if (ret || !num_fds)
>               errx(1, "cannot setup event list");
> 
>       memset(pollfds, 0, sizeof(pollfds));
> @@ -536,7 +536,7 @@ mainloop(char **arg)
>               errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, 
> WEXITSTATUS(status));
> 
>       fds[0].fd = -1;
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
> 
>               fds[i].hw.disabled = 0; /* start immediately */
> 
> @@ -595,7 +595,7 @@ mainloop(char **arg)
>        * We are skipping the first 3 values (nr, time_enabled, time_running)
>        * and then for each event we get a pair of values.
>        */
> -     sz = (3+2*num_events)*sizeof(uint64_t);
> +     sz = (3+2*num_fds)*sizeof(uint64_t);
>       val = malloc(sz);
>       if (!val)
>               err(1, "cannot allocated memory");
> @@ -605,7 +605,7 @@ mainloop(char **arg)
>               err(1, "cannot read id %zu", sizeof(val));
> 
> 
> -     for(i=0; i < num_events; i++) {
> +     for(i=0; i < num_fds; i++) {
>               fds[i].id = val[2*i+1+3];
>               printf("%"PRIu64"  %s\n", fds[i].id, fds[i].name);
>       }
> @@ -639,7 +639,7 @@ terminate_session:
>        */
>       wait4(pid, &status, 0, NULL);
> 
> -     for(i=0; i < num_events; i++)
> +     for(i=0; i < num_fds; i++)
>               close(fds[i].fd);
> 
>       /* check for partial event buffer */

-- 
Regards,

- Corey

Corey Ashford
Software Engineer
IBM Linux Technology Center, Linux Toolchain
Beaverton, OR 
503-578-3507 
cjash...@us.ibm.com


------------------------------------------------------------------------------
_______________________________________________
perfmon2-devel mailing list
perfmon2-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel

Reply via email to