On 5/4/2010 1:25 PM, Stephane Eranian wrote: > Corey, > > I reworked your patch a little bit. I promoted certain helper functions > to perf_util.h. > > I did not really like the extra max_fds parameter to the perf_setup_*().
I was hesitant on that design decision too.. I thought about squirreling away those two values in a new data structure which contains both fds and num_fds, and max_fds, but at the time I felt like the ripple effect would be too high. The idea of sneaking it into the first element of the array didn't occur to me, but seems reasonable, and produces a smaller change impact. > I thought it was too much to carry around. Instead, I stash this value > into the fds[] first entry. I kept num_fds as a parameter, this way > the return value is really an error code. Ok, sounds good. > I also updated the patch to > the latest HEAD. I'm not seeing the patch in the master branch. Did you remember to push the commit up to sourceforge? These are the two top commits as I see them: elm3c4.beaverton.ibm.com:corey-401% git log -2 commit 6ae081883cc55f183d60041c12122cb1504d497d Author: Stephane Eranian <eran...@gmail.com> Date: Mon May 3 11:41:58 2010 +0200 make pfmlib_attr_desc_t use const char * To be consistent with events and also because that is how the compiler stores the name and description strings. Signed-off-by: Stephane Eranian <eran...@gmail.com> commit 16e8542d9c9cda60c4f6733002028a107dd1ad38 Author: Stephane Eranian <eran...@gmail.com> Date: Mon May 3 11:14:56 2010 +0200 enforce const char * for perf_util.c entry points The compiler does store initialized strings in read-only sections when you do: char *foo = "bar"; Thus the correct definition is: const char *foo = "bar"; Update all examples to use (or cast) to const char. Signed-off-by: Stephane Eranian <eran...@gmail.com> - Corey > index 9192df2..598be72 100644 > --- a/perf_examples/notify_group.c > +++ b/perf_examples/notify_group.c > @@ -44,7 +44,7 @@ typedef struct { > static volatile unsigned long notification_received; > > static perf_event_desc_t *fds; > -static int num_events; > +static int num_fds; > > static int buffer_pages = 1; /* size of buffer payload (must be power of 2) > */ > > @@ -56,7 +56,7 @@ sigio_handler(int n, struct siginfo *info, struct > sigcontext *sc) > uint64_t ip; > int id, ret; > > - id = perf_fd2event(fds, num_events, info->si_fd); > + id = perf_fd2event(fds, num_fds, info->si_fd); > if (id == -1) > errx(1, "cannot find event for descriptor %d", info->si_fd); > > @@ -129,15 +129,15 @@ main(int argc, char **argv) > /* > * allocates fd for us > */ > - num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," > + ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," > "PERF_COUNT_HW_CPU_CYCLES," > "PERF_COUNT_HW_CPU_CYCLES", > - &fds); > - if (num_events < 1) > + &fds, &num_fds); > + if (ret || !num_fds) > exit(1); > > fds[0].fd = -1; > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > /* want a notification for each sample added to the buffer */ > fds[i].hw.disabled = !!i; > printf("i=%d disabled=%d\n", i, fds[i].hw.disabled); > @@ -180,7 +180,7 @@ main(int argc, char **argv) > fds[i].pgmsk = (buffer_pages * pgsz) - 1; > } > > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > ret = ioctl(fds[i].fd, PERF_EVENT_IOC_REFRESH , 1); > if (ret == -1) > err(1, "cannot refresh"); > @@ -194,7 +194,7 @@ error: > /* > * destroy our session > */ > - for(i=0; i < num_events; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > > free(fds); > diff --git a/perf_examples/notify_self.c b/perf_examples/notify_self.c > index 12dcfc9..ceee2cd 100644 > --- a/perf_examples/notify_self.c > +++ b/perf_examples/notify_self.c > @@ -40,8 +40,8 @@ > > static volatile unsigned long notification_received; > > -static perf_event_desc_t *fds; > -static int num_events; > +static perf_event_desc_t *fds = NULL; > +static int num_fds = 0; > > static int buffer_pages = 1; /* size of buffer payload (must be power of 2)*/ > > @@ -79,7 +79,7 @@ print_sample(int id) > if (ret) > errx(1, "cannot read grp"); > > - e = perf_id2event(fds, num_events, grp.id); > + e = perf_id2event(fds, num_fds, grp.id); > if (e == -1) > str = "unknown event"; > else > @@ -119,7 +119,7 @@ sigio_handler(int n, struct siginfo *info, void *uc) > if (info->si_code != POLL_HUP) > errx(1, "signal not generated by SIGIO"); > > - id = perf_fd2event(fds, num_events, info->si_fd); > + id = perf_fd2event(fds, num_fds, info->si_fd); > if (id == -1) > errx(1, "no event associated with fd=%d", info->si_fd); > > @@ -188,14 +188,14 @@ main(int argc, char **argv) > /* > * allocates fd for us > */ > - num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," > + ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," > "PERF_COUNT_HW_INSTRUCTIONS", > - &fds); > - if (num_events < 1) > + &fds, &num_fds); > + if (ret || (num_fds == 0)) > exit(1); > > fds[0].fd = -1; > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > > /* want a notification for every each added to the buffer */ > fds[i].hw.disabled = !i; > @@ -213,7 +213,7 @@ main(int argc, char **argv) > err(1, "cannot attach event %s", fds[i].name); > } > > - sz = (3+2*num_events)*sizeof(uint64_t); > + sz = (3+2*num_fds)*sizeof(uint64_t); > val = malloc(sz); > if (!val) > err(1, "cannot allocated memory"); > @@ -245,7 +245,7 @@ main(int argc, char **argv) > * We are skipping the first 3 values (nr, time_enabled, time_running) > * and then for each event we get a pair of values. > */ > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > fds[i].id = val[2*i+1+3]; > printf("%"PRIu64" %s\n", fds[i].id, fds[i].name); > } > @@ -295,7 +295,7 @@ main(int argc, char **argv) > /* > * destroy our session > */ > - for(i=0; i < num_events; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > > free(fds); > diff --git a/perf_examples/perf_util.c b/perf_examples/perf_util.c > index 4afc978..9c99b15 100644 > --- a/perf_examples/perf_util.c > +++ b/perf_examples/perf_util.c > @@ -31,69 +31,86 @@ > #include <perfmon/pfmlib_perf_event.h> > #include "perf_util.h" > > +/* the **fd parameter must point to a null pointer on the first call > + * max_fds and num_fds must both point to a zero value on the first call > + * The return value is success (0) vs. failure (non-zero) > + */ > int > -perf_setup_argv_events(const char **argv, perf_event_desc_t **fd) > +perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int > *num_fds) > { > - perf_event_desc_t *fdt = NULL; > - int num = 0, max_fd = 0, new_max; > - int ret; > + perf_event_desc_t *fd; > + int new_max, ret, num, max_fds; > + int group_leader; > > - if (!(argv && fd)) > + if (!(argv && fds && num_fds)) > return -1; > > - > - while(*argv) { > - if (num == max_fd) { > + fd = *fds; > + if (fd) { > + max_fds = fd[0].max_fds; > + if (max_fds < 2) > + return -1; > + num = *num_fds; > + } else { > + max_fds = num = 0; /* bootstrap */ > + } > + group_leader = num; > > - if (!max_fd) > + while(*argv) { > + if (num == max_fds) { > + if (max_fds == 0) > new_max = 2; > else > - new_max = max_fd << 1; > + new_max = max_fds << 1; > > - if (new_max < max_fd) { > + if (new_max < max_fds) { > warn("too many entries"); > goto error; > } > - fdt = realloc(fdt, new_max * sizeof(*fdt)); > - if (!fdt) { > + fd = realloc(fd, new_max * sizeof(*fd)); > + if (!fd) { > warn("cannot allocate memory"); > goto error; > } > /* reset newly allocated chunk */ > - memset(fdt+max_fd, 0, (new_max - max_fd) * > sizeof(*fdt)); > - max_fd = new_max; > + memset(fd + max_fds, 0, (new_max - max_fds) * > sizeof(*fd)); > + max_fds = new_max; > + > + /* update max size */ > + fd[0].max_fds = max_fds; > } > > - ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, > &fdt[num].hw, NULL, NULL); > + ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, &fd[num].hw, > NULL, NULL); > if (ret != PFM_SUCCESS) { > warnx("event %s: %s\n", *argv, pfm_strerror(ret)); > goto error; > } > /* ABI compatibility */ > - fdt[num].hw.size = sizeof(struct perf_event_attr); > - fdt[num].name = *argv; > + fd[num].hw.size = sizeof(struct perf_event_attr); > + > + fd[num].name = *argv; > + fd[num].group_leader = group_leader; > num++; > argv++; > } > - *fd = fdt; > - return num; > + *num_fds = num; > + *fds = fd; > + return 0; > error: > - free(fdt); > + free(fd); > return -1; > } > > int > -perf_setup_list_events(const char *ev, perf_event_desc_t **fd) > +perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds) > { > const char **argv; > - char *p, *q; > - char *events; > - int num = 0, i, ret; > + char *p, *q, *events; > + int i, ret, num = 0; > > - if (!(ev && fd)) > + if (!(ev && fd && num_fds)) > return -1; > > - > events = strdup(ev); > if (!events) > return -1; > @@ -105,7 +122,7 @@ perf_setup_list_events(const char *ev, perf_event_desc_t > **fd) > } > num++; > num++; /* terminator */ > - > + > argv = malloc(num * sizeof(char *)); > if (!argv) { > free(events); > @@ -119,12 +136,35 @@ perf_setup_list_events(const char *ev, > perf_event_desc_t **fd) > } > argv[i++] = q; > argv[i] = NULL; > - ret = perf_setup_argv_events(argv, fd); > + ret = perf_setup_argv_events(argv, fd, num_fds); > free(argv); > return ret; > } > > int > +perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx) > +{ > + int leader; > + int i; > + > + if (idx < 0 || idx >= num) > + return 0; > + > + leader = fds[idx].group_leader; > + > + for (i = leader + 1; i < num; i++) { > + if (fds[i].group_leader != leader) { > + /* This is a new group leader, so the previous > + * event was the final event of the preceding > + * group. > + */ > + return i - leader; > + } > + } > + return 1; > +} > + > +int > perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, void *buf, > size_t sz) > { > char *data; > diff --git a/perf_examples/perf_util.h b/perf_examples/perf_util.h > index 8f5222f..c65c003 100644 > --- a/perf_examples/perf_util.h > +++ b/perf_examples/perf_util.h > @@ -36,14 +36,16 @@ typedef struct { > uint64_t id; /* event id kernel */ > void *buf; > size_t pgmsk; > + int group_leader; > int fd; > + int max_fds; > } perf_event_desc_t; > > /* handy shortcut */ > #define PERF_FORMAT_SCALE > (PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING) > > -extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd); > -extern int perf_setup_list_events(const char *events, perf_event_desc_t > **fd); > +extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd, > int *num_fds); > +extern int perf_setup_list_events(const char *events, perf_event_desc_t > **fd, int *num_fds); > extern int perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, > void *buf, size_t sz); > extern void perf_skip_buffer(struct perf_event_mmap_page *hdr, size_t sz); > > @@ -109,4 +111,13 @@ perf_id2event(perf_event_desc_t *fds, int num_events, > uint64_t id) > return j; > return -1; > } > + > +static inline int > +perf_is_group_leader(perf_event_desc_t *fds, int idx) > +{ > + return fds[idx].group_leader == idx; > +} > + > +extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int > leader); > + > #endif > diff --git a/perf_examples/self.c b/perf_examples/self.c > index 12ebcd4..11e3d85 100644 > --- a/perf_examples/self.c > +++ b/perf_examples/self.c > @@ -65,9 +65,9 @@ noploop(void) > int > main(int argc, char **argv) > { > - perf_event_desc_t *fds; > + perf_event_desc_t *fds = NULL; > uint64_t values[3]; > - int i, ret, num; > + int i, ret, num_fds = 0; > > setlocale(LC_ALL, ""); > /* > @@ -77,12 +77,12 @@ main(int argc, char **argv) > if (ret != PFM_SUCCESS) > errx(1, "Cannot initialize library: %s", pfm_strerror(ret)); > > - num = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : > gen_events, &fds); > - if (num == -1) > + ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : > gen_events, &fds, &num_fds); > + if (ret || !num_fds) > errx(1, "cannot setup events"); > > fds[0].fd = -1; > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds; i++) { > /* request timing information necessary for scaling */ > fds[i].hw.read_format = PERF_FORMAT_SCALE; > > @@ -97,8 +97,8 @@ main(int argc, char **argv) > signal(SIGALRM, sig_handler); > > /* > - * enable all counters attached to this thread and created by it > - */ > + * enable all counters attached to this thread and created by it > + */ > ret = prctl(PR_TASK_PERF_EVENTS_ENABLE); > if (ret) > err(1, "prctl(enable) failed"); > @@ -108,8 +108,8 @@ main(int argc, char **argv) > noploop(); > > /* > - * disable all counters attached to this thread > - */ > + * disable all counters attached to this thread > + */ > ret = prctl(PR_TASK_PERF_EVENTS_DISABLE); > if (ret) > err(1, "prctl(disable) failed"); > @@ -121,7 +121,7 @@ main(int argc, char **argv) > */ > memset(values, 0, sizeof(values)); > > - for (i=0; i < num; i++) { > + for (i=0; i < num_fds; i++) { > uint64_t val; > double ratio; > > diff --git a/perf_examples/self_count.c b/perf_examples/self_count.c > index 48855fd..c77456d 100644 > --- a/perf_examples/self_count.c > +++ b/perf_examples/self_count.c > @@ -179,10 +179,10 @@ read_count(perf_event_desc_t *fds) > int > main(int argc, char **argv) > { > - perf_event_desc_t *fds; > + perf_event_desc_t *fds = NULL; > size_t pgsz; > uint64_t val; > - int i, ret, num; > + int i, ret, num_fds = 0; > int n = 30; > > pgsz = sysconf(_SC_PAGESIZE); > @@ -193,12 +193,12 @@ main(int argc, char **argv) > if (ret != PFM_SUCCESS) > errx(1, "Cannot initialize library: %s", pfm_strerror(ret)); > > - num = perf_setup_argv_events(argc > 1 ? (const char **)(argv+1) : > gen_events, &fds); > - if (num == -1) > + ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : > gen_events, &fds, &num_fds); > + if (ret || !num_fds) > errx(1, "cannot setup events"); > > fds[0].fd = -1; > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds; i++) { > /* request timing information necesaary for scaling */ > fds[i].hw.read_format = PERF_FORMAT_SCALE; > fds[i].hw.disabled = 0; > @@ -215,15 +215,15 @@ main(int argc, char **argv) > signal(SIGALRM, sig_handler); > > /* > - * enable all counters attached to this thread > - */ > + * enable all counters attached to this thread > + */ > ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0); > > alarm(10); > > for(;quit == 0;) { > - > - for (i=0; i < num; i++) { > + > + for (i=0; i < num_fds; i++) { > val = read_count(&fds[i]); > printf("%20"PRIu64" %s\n", val, fds[i].name); > } > @@ -233,11 +233,11 @@ main(int argc, char **argv) > n = 30; > } > /* > - * disable all counters attached to this thread > - */ > + * disable all counters attached to this thread > + */ > ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0); > > - for (i=0; i < num; i++) { > + for (i=0; i < num_fds; i++) { > munmap(fds[i].buf, pgsz); > close(fds[i].fd); > } > diff --git a/perf_examples/self_pipe.c b/perf_examples/self_pipe.c > index 8fb919a..90a90ff 100644 > --- a/perf_examples/self_pipe.c > +++ b/perf_examples/self_pipe.c > @@ -85,8 +85,9 @@ static void > measure(void) > { > perf_event_desc_t *fds = NULL; > + int num_fds = 0; > uint64_t values[3]; > - int i, ret, num; > + int i, ret; > int pr[2], pw[2]; > ssize_t nbytes; > pid_t pid; > @@ -109,11 +110,11 @@ measure(void) > if (ret) > err(1, "cannot create write pipe"); > > - num = perf_setup_list_events(options.events, &fds); > - if (num < 1) > + ret = perf_setup_list_events(options.events, &fds, &num_fds); > + if (ret || !num_fds) > exit(1); > > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds; i++) { > fds[i].hw.disabled = 1; > fds[i].hw.read_format = PERF_FORMAT_SCALE; > > @@ -141,7 +142,7 @@ measure(void) > err(1, "cannot create child\n"); > case 0: > /* do not inherit session fd */ > - for(i=0; i < num; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > /* pr[]: write master, read child */ > /* pw[]: read master, write child */ > @@ -170,7 +171,7 @@ measure(void) > > prctl(PR_TASK_PERF_EVENTS_DISABLE); > > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds; i++) { > uint64_t val; > double ratio; > > @@ -204,7 +205,7 @@ measure(void) > /* > * and destroy our session > */ > - for(i=0; i < num; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > > free(fds); > diff --git a/perf_examples/self_smpl_multi.c b/perf_examples/self_smpl_multi.c > index 30d6acb..0ca45f5 100644 > --- a/perf_examples/self_smpl_multi.c > +++ b/perf_examples/self_smpl_multi.c > @@ -77,7 +77,6 @@ static int program_time = PROGRAM_TIME; > static int threshold = THRESHOLD; > static int signum = SIGIO; > static pthread_barrier_t barrier; > -static int num_events; > > > static int buffer_pages = 1; > @@ -120,8 +119,9 @@ long bad_msg[MAX_THR]; > long bad_restart[MAX_THR]; > int fown; > > -int __thread myid; /* TLS */ > +static int __thread myid; /* TLS */ > static perf_event_desc_t __thread *fds; /* TLS */ > +static int __thread num_fds; /* TLS */ > > pid_t > gettid(void) > @@ -277,8 +277,10 @@ overflow_start(char *name) > size_t pgsz; > int ret, fd, flags; > > - num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds); > - if (num_events != 1) > + fds = NULL; > + num_fds = 0; > + ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds, > &num_fds); > + if (ret || !num_fds) > errx(1, "cannot monitor event"); > > pgsz = sysconf(_SC_PAGESIZE); > diff --git a/perf_examples/syst.c b/perf_examples/syst.c > index 429c6c4..a681e41 100644 > --- a/perf_examples/syst.c > +++ b/perf_examples/syst.c > @@ -43,22 +43,21 @@ typedef struct { > > static options_t options; > static perf_event_desc_t **all_fds; > -static int num; > +static int *num_fds; > > void > setup_cpu(int cpu) > { > - perf_event_desc_t *fds = NULL; > - int i; > + perf_event_desc_t *fds; > + int i, ret; > > - num = perf_setup_list_events(options.events, &fds); > - if (num == -1) > + ret = perf_setup_list_events(options.events, &all_fds[cpu], > &num_fds[cpu]); > + if (ret || (num_fds == 0)) > errx(1, "cannot setup events\n"); > - > - all_fds[cpu] = fds; > + fds = all_fds[cpu]; /* temp */ > > fds[0].fd = -1; > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds[cpu]; i++) { > fds[i].hw.disabled = options.group ? !i : 1; > > if (options.excl && ((options.group && !i) || (!options.group))) > @@ -92,10 +91,11 @@ measure(void) > cmax = cmin + 1; > ncpus = 1; > } > - all_fds = malloc(ncpus * sizeof(perf_event_desc_t)); > - if (!all_fds) > - err(1, "cannot allocate memory for all_fds"); > + all_fds = calloc(ncpus, sizeof(perf_event_desc_t)); > + num_fds = calloc(ncpus, sizeof(int)); > > + if (!all_fds || !num_fds) > + err(1, "cannot allocate memory for internal structures"); > for(c=cmin ; c < cmax; c++) > setup_cpu(c); > > @@ -106,7 +106,7 @@ measure(void) > fds = all_fds[c]; > if (options.group) > ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0); > - else for(i=0; i < num; i++) { > + else for(i=0; i < num_fds[c]; i++) { > ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0); > if (ret) > err(1, "cannot enable event %s\n", fds[i].name); > @@ -121,7 +121,7 @@ measure(void) > puts("------------------------"); > for(c = cmin; c < cmax; c++) { > fds = all_fds[c]; > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds[c]; i++) { > double ratio; > > ret = read(fds[i].fd, values, sizeof(values)); > @@ -152,7 +152,7 @@ measure(void) > } > for(c = cmin; c < cmax; c++) { > fds = all_fds[c]; > - for(i=0; i < num; i++) > + for(i=0; i < num_fds[c]; i++) > close(fds[i].fd); > } > free(all_fds); > diff --git a/perf_examples/task.c b/perf_examples/task.c > index b29822a..6c37b84 100644 > --- a/perf_examples/task.c > +++ b/perf_examples/task.c > @@ -35,10 +35,13 @@ > > #include "perf_util.h" > > +#define MAX_GROUPS 16 > + > typedef struct { > - const char *events; > + const char *events[MAX_GROUPS]; > + int num_groups; > + int format_group; > int inherit; > - int group; > int print; > int pin; > pid_t pid; > @@ -59,11 +62,11 @@ child(char **arg) > } > > static void > -read_group(perf_event_desc_t *fds, int num) > +read_groups(perf_event_desc_t *fds, int num) > { > - uint64_t *values; > - size_t sz; > - int i, ret; > + uint64_t *values = NULL; > + size_t new_sz, sz = 0; > + int i, evt, ret; > > /* > * { u64 nr; > @@ -76,56 +79,55 @@ read_group(perf_event_desc_t *fds, int num) > * > * we do not use FORMAT_ID in this program > */ > - sz = sizeof(uint64_t) * (3 + num); > - values = malloc(sz); > - if (!values) > - err(1, "cannot allocate memory for values\n"); > - > - ret = read(fds[0].fd, values, sz); > - if (ret != sz) { /* unsigned */ > - if (ret == -1) > - err(1, "cannot read values event %s", fds[0].name); > - else /* likely pinned and could not be loaded */ > - warnx("could not read event0 ret=%d", ret); > - } > > - /* > - * propagate to save area > - */ > - for(i=0; i < num; i++) { > - values[0] = values[3+i]; > - /* > - * scaling because we may be sharing the PMU and > - * thus may be multiplexed > - */ > - fds[i].prev_value = fds[i].value; > - fds[i].value = perf_scale(values); > - fds[i].enabled = values[1]; > - fds[i].running = values[2]; > - } > - free(values); > -} > + for (evt = 0; evt < num; ) { > + int num_evts_to_read; > > -static void > -read_single(perf_event_desc_t *fds, int num) > -{ > - uint64_t values[3]; > - int i, ret; > + if (options.format_group) { > + num_evts_to_read = perf_get_group_nevents(fds, num, > evt); > + new_sz = sizeof(uint64_t) * (3 + num_evts_to_read); > + } else { > + num_evts_to_read = 1; > + new_sz = sizeof(uint64_t) * 3; > + } > > - for(i=0; i < num; i++) { > + if (new_sz > sz) { > + sz = new_sz; > + values = realloc(values, sz); > + } > > - ret = read(fds[i].fd, values, sizeof(values)); > - if (ret != sizeof(values)) { /* unsigned */ > + if (!values) > + err(1, "cannot allocate memory for values\n"); > + > + ret = read(fds[evt].fd, values, sz); > + if (ret != sz) { /* unsigned */ > if (ret == -1) > - err(1, "cannot read values event %s", > fds[i].name); > - else /* likely pinned and could not be loaded */ > - warnx("could not read event%d", i); > + err(1, "cannot read values event %s", > fds[0].name); > + > + /* likely pinned and could not be loaded */ > + warnx("could not read event %d, tried to read %d bytes, > but got %d", > + evt, (int)sz, ret); > + } > + > + /* > + * propagate to save area > + */ > + for (i = evt; i < (evt + num_evts_to_read); i++) { > + if (options.format_group) > + values[0] = values[3 + (i - evt)]; > + /* > + * scaling because we may be sharing the PMU and > + * thus may be multiplexed > + */ > + fds[i].prev_value = fds[i].value; > + fds[i].value = perf_scale(values); > + fds[i].enabled = values[1]; > + fds[i].running = values[2]; > } > - fds[i].prev_value = fds[i].value; > - fds[i].value = perf_scale(values); > - fds[i].enabled = values[1]; > - fds[i].running = values[2]; > + evt += num_evts_to_read; > } > + if (values) > + free(values); > } > > static void > @@ -133,21 +135,21 @@ print_counts(perf_event_desc_t *fds, int num) > { > int i; > > - if (options.group) > - read_group(fds, num); > - else > - read_single(fds, num); > + read_groups(fds, num); > > for(i=0; i < num; i++) { > double ratio; > uint64_t val; > > val = fds[i].value - fds[i].prev_value; > - > ratio = 0.0; > if (fds[i].enabled) > ratio = 1.0 * fds[i].running / fds[i].enabled; > > + /* separate groups */ > + if (i && fds[i].hw.enable_on_exec) > + putchar('\n'); > + > if (ratio == 1.0) > printf("%'20"PRIu64" %s (%'"PRIu64" : %'"PRIu64")\n", > val, fds[i].name, fds[i].enabled, fds[i].running); > else > @@ -167,8 +169,8 @@ static void sig_handler(int n) > int > parent(char **arg) > { > - perf_event_desc_t *fds; > - int status, ret, i, num; > + perf_event_desc_t *fds = NULL; > + int status, ret, i, num_fds = 0, grp, group_fd; > int ready[2], go[2]; > char buf; > pid_t pid; > @@ -176,9 +178,12 @@ parent(char **arg) > if (pfm_initialize() != PFM_SUCCESS) > errx(1, "libpfm initialization failed"); > > - num = perf_setup_list_events(options.events, &fds); > - if (num < 1) > - exit(1); > + for (grp = 0; grp < options.num_groups; grp++) { > + int ret; > + ret = perf_setup_list_events(options.events[grp], &fds, > &num_fds); > + if (ret || !num_fds) > + exit(1); > + } > > pid = options.pid; > if (!pid) { > @@ -232,37 +237,43 @@ parent(char **arg) > } > > fds[0].fd = -1; > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds; i++) { > + int is_group_leader; /* boolean */ > + > + is_group_leader = perf_is_group_leader(fds, i); > + if (is_group_leader) { > + /* this is the group leader */ > + group_fd = -1; > + } else { > + group_fd = fds[fds[i].group_leader].fd; > + } > + > /* > * create leader disabled with enable_on-exec > */ > if (!options.pid) { > - if (options.group) { > - fds[i].hw.disabled = !i; > - fds[i].hw.enable_on_exec = !i; > - } else { > - fds[i].hw.disabled = 1; > - fds[i].hw.enable_on_exec = 1; > - } > + fds[i].hw.disabled = is_group_leader; > + fds[i].hw.enable_on_exec = is_group_leader; > } > > fds[i].hw.read_format = PERF_FORMAT_SCALE; > /* request timing information necessary for scaling counts */ > - if (!i && options.group) > - fds[0].hw.read_format |= PERF_FORMAT_GROUP; > + if (is_group_leader && options.format_group) > + fds[i].hw.read_format |= PERF_FORMAT_GROUP; > > if (options.inherit) > fds[i].hw.inherit = 1; > > - if (options.pin && ((options.group && i== 0) || > (!options.group))) > + if (options.pin && is_group_leader) > fds[i].hw.pinned = 1; > - > - fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, options.group > ? fds[0].fd : -1, 0); > + fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, group_fd, 0); > if (fds[i].fd == -1) { > warn("cannot attach event%d %s", i, fds[i].name); > goto error; > } > - } > + } > + ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0); > + > > if (!options.pid) > close(go[1]); > @@ -271,12 +282,12 @@ parent(char **arg) > if (!options.pid) { > while(waitpid(pid, &status, WNOHANG) == 0) { > sleep(1); > - print_counts(fds, num); > + print_counts(fds, num_fds); > } > } else { > while(quit == 0) { > sleep(1); > - print_counts(fds, num); > + print_counts(fds, num_fds); > } > } > } else { > @@ -284,10 +295,10 @@ parent(char **arg) > waitpid(pid, &status, 0); > else > pause(); > - print_counts(fds, num); > + print_counts(fds, num_fds); > } > > - for(i=0; i < num; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > > free(fds); > @@ -305,11 +316,11 @@ usage(void) > printf("usage: task [-h] [-i] [-g] [-p] [-P] [-t pid] [-e > event1,event2,...] cmd\n" > "-h\t\tget help\n" > "-i\t\tinherit across fork\n" > - "-g\t\tgroup events\n" > + "-f\t\tuse PERF_FORMAT_GROUP for reading up counts > (experimental, not working)\n" > "-p\t\tprint counts every second\n" > "-P\t\tpin events\n" > "-t pid\tmeasure existing pid\n" > - "-e ev,ev\tlist of events to measure\n" > + "-e ev,ev\tgroup of events to measure (multiple -e switches are > allowed)\n" > ); > } > > @@ -320,13 +331,18 @@ main(int argc, char **argv) > > setlocale(LC_ALL, ""); > > - while ((c=getopt(argc, argv,"he:igpPt:")) != -1) { > + while ((c=getopt(argc, argv,"he:ifpPt:")) != -1) { > switch(c) { > case 'e': > - options.events = optarg; > + if (options.num_groups < MAX_GROUPS) { > + options.events[options.num_groups++] = > optarg; > + } else { > + errx(1, "you cannot specify more than > %d groups.\n", > + MAX_GROUPS); > + } > break; > - case 'g': > - options.group = 1; > + case 'f': > + options.format_group = 1; > break; > case 'p': > options.print = 1; > @@ -347,12 +363,13 @@ main(int argc, char **argv) > errx(1, "unknown error"); > } > } > - if (!options.events) > - options.events = > "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS"; > - > + if (options.num_groups == 0) { > + options.events[0] = > "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS"; > + options.num_groups = 1; > + } > if (!argv[optind] && !options.pid) > errx(1, "you must specify a command to execute or a thread to > attach to\n"); > - > + > signal(SIGINT, sig_handler); > > return parent(argv+optind); > diff --git a/perf_examples/task_attach_timeout.c > b/perf_examples/task_attach_timeout.c > index bfcb60d..a3bffb0 100644 > --- a/perf_examples/task_attach_timeout.c > +++ b/perf_examples/task_attach_timeout.c > @@ -95,19 +95,19 @@ print_counts(perf_event_desc_t *fds, int num, int > do_delta) > int > measure(pid_t pid) > { > - perf_event_desc_t *fds; > - int i, num; > + perf_event_desc_t *fds = NULL; > + int i, ret, num_fds = 0; > char fn[32]; > > if (pfm_initialize() != PFM_SUCCESS) > errx(1, "libpfm initialization failed\n"); > > - num = perf_setup_list_events(options.events, &fds); > - if (num < 1) > + ret = perf_setup_list_events(options.events, &fds, &num_fds); > + if (ret || (num_fds == 0)) > exit(1); > > fds[0].fd = -1; > - for(i=0; i < num; i++) { > + for(i=0; i < num_fds; i++) { > fds[i].hw.disabled = 0; /* start immediately */ > > /* request timing information necessary for scaling counts */ > @@ -130,15 +130,15 @@ measure(pid_t pid) > sleep(1); > options.delay--; > if (options.print) > - print_counts(fds, num, 1); > + print_counts(fds, num_fds, 1); > } > if (options.delay) > warn("thread %d terminated before timeout", pid); > > if (!options.print) > - print_counts(fds, num, 0); > + print_counts(fds, num_fds, 0); > > - for(i=0; i < num; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > > free(fds); > diff --git a/perf_examples/task_smpl.c b/perf_examples/task_smpl.c > index 7d029db..2ceeb01 100644 > --- a/perf_examples/task_smpl.c > +++ b/perf_examples/task_smpl.c > @@ -58,7 +58,7 @@ typedef struct { > static jmp_buf jbuf; > static uint64_t collected_samples, lost_samples; > static perf_event_desc_t *fds; > -static int num_events; > +static int num_fds; > static options_t options; > static uint64_t sum_period; > > @@ -293,7 +293,7 @@ display_sample(perf_event_desc_t *hw, struct > perf_event_header *ehdr) > > sz -= sizeof(grp); > > - e = perf_id2event(fds, num_events, grp.id); > + e = perf_id2event(fds, num_fds, grp.id); > if (e == -1) > str = "unknown sample event"; > else > @@ -395,7 +395,7 @@ display_lost(perf_event_desc_t *hw) > if (ret) > errx(1, "cannot read lost info"); > > - e = perf_id2event(fds, num_events, lost.id); > + e = perf_id2event(fds, num_fds, lost.id); > if (e == -1) > str = "unknown lost event"; > else > @@ -486,8 +486,8 @@ mainloop(char **arg) > /* > * does allocate fds > */ > - num_events = perf_setup_list_events(options.events, &fds); > - if (num_events == -1) > + ret = perf_setup_list_events(options.events, &fds, &num_fds); > + if (ret || !num_fds) > errx(1, "cannot setup event list"); > > memset(pollfds, 0, sizeof(pollfds)); > @@ -512,7 +512,7 @@ mainloop(char **arg) > errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, > WEXITSTATUS(status)); > > fds[0].fd = -1; > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > > fds[i].hw.disabled = 0; /* start immediately */ > > @@ -538,7 +538,7 @@ mainloop(char **arg) > > /* must get event id for SAMPLE_GROUP */ > fds[i].hw.read_format = PERF_FORMAT_SCALE; > - if (num_events > 1) > + if (num_fds > 1) > fds[i].hw.read_format |= > PERF_FORMAT_GROUP|PERF_FORMAT_ID; > } > > @@ -573,7 +573,7 @@ mainloop(char **arg) > * We are skipping the first 3 values (nr, time_enabled, time_running) > * and then for each event we get a pair of values. > */ > - sz = (3+2*num_events)*sizeof(uint64_t); > + sz = (3+2*num_fds)*sizeof(uint64_t); > val = malloc(sz); > if (!val) > err(1, "cannot allocate memory"); > @@ -583,7 +583,7 @@ mainloop(char **arg) > err(1, "cannot read id %zu", sizeof(val)); > > > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > fds[i].id = val[2*i+1+3]; > printf("%"PRIu64" %s\n", fds[i].id, fds[i].name); > } > @@ -617,7 +617,7 @@ terminate_session: > */ > wait4(pid, &status, 0, NULL); > > - for(i=0; i < num_events; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > > /* check for partial event buffer */ > diff --git a/perf_examples/x86/bts_smpl.c b/perf_examples/x86/bts_smpl.c > index 46237bd..b28fd06 100644 > --- a/perf_examples/x86/bts_smpl.c > +++ b/perf_examples/x86/bts_smpl.c > @@ -59,7 +59,7 @@ typedef struct { > static jmp_buf jbuf; > static uint64_t collected_samples, lost_samples; > static perf_event_desc_t *fds; > -static int num_events; > +static int num_fds; > static options_t options; > > static struct option the_options[]={ > @@ -172,7 +172,7 @@ display_lost(perf_event_desc_t *hw) > if (ret) > errx(1, "cannot read lost info"); > > - e = perf_id2event(fds, num_events, lost.id); > + e = perf_id2event(fds, num_fds, lost.id); > if (e == -1) > str = "unknown lost event"; > else > @@ -239,8 +239,8 @@ mainloop(char **arg) > /* > * does allocate fds > */ > - num_events = > perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds); > - if (num_events == -1) > + ret = perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds, > &num_fds); > + if (ret || !num_fds) > errx(1, "cannot setup event"); > > memset(pollfds, 0, sizeof(pollfds)); > diff --git a/perf_examples/x86/pebs_smpl.c b/perf_examples/x86/pebs_smpl.c > index 83ba098..f24aa44 100644 > --- a/perf_examples/x86/pebs_smpl.c > +++ b/perf_examples/x86/pebs_smpl.c > @@ -54,7 +54,7 @@ typedef struct { > static jmp_buf jbuf; > static uint64_t collected_samples, lost_samples; > static perf_event_desc_t *fds; > -static int num_events; > +static int num_fds; > static options_t options; > static uint64_t sum_period; > > @@ -353,7 +353,7 @@ display_sample(perf_event_desc_t *hw, struct > perf_event_header *ehdr) > > sz -= sizeof(grp); > > - e = perf_id2event(fds, num_events, grp.id); > + e = perf_id2event(fds, num_fds, grp.id); > if (e == -1) > str = "unknown sample event"; > else > @@ -419,7 +419,7 @@ display_lost(perf_event_desc_t *hw) > if (ret) > errx(1, "cannot read lost info"); > > - e = perf_id2event(fds, num_events, lost.id); > + e = perf_id2event(fds, num_fds, lost.id); > if (e == -1) > str = "unknown lost event"; > else > @@ -510,8 +510,8 @@ mainloop(char **arg) > /* > * does allocate fds > */ > - num_events = perf_setup_list_events(options.events, &fds); > - if (num_events == -1) > + ret = perf_setup_list_events(options.events, &fds, &num_fds); > + if (ret || !num_fds) > errx(1, "cannot setup event list"); > > memset(pollfds, 0, sizeof(pollfds)); > @@ -536,7 +536,7 @@ mainloop(char **arg) > errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, > WEXITSTATUS(status)); > > fds[0].fd = -1; > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > > fds[i].hw.disabled = 0; /* start immediately */ > > @@ -595,7 +595,7 @@ mainloop(char **arg) > * We are skipping the first 3 values (nr, time_enabled, time_running) > * and then for each event we get a pair of values. > */ > - sz = (3+2*num_events)*sizeof(uint64_t); > + sz = (3+2*num_fds)*sizeof(uint64_t); > val = malloc(sz); > if (!val) > err(1, "cannot allocated memory"); > @@ -605,7 +605,7 @@ mainloop(char **arg) > err(1, "cannot read id %zu", sizeof(val)); > > > - for(i=0; i < num_events; i++) { > + for(i=0; i < num_fds; i++) { > fds[i].id = val[2*i+1+3]; > printf("%"PRIu64" %s\n", fds[i].id, fds[i].name); > } > @@ -639,7 +639,7 @@ terminate_session: > */ > wait4(pid, &status, 0, NULL); > > - for(i=0; i < num_events; i++) > + for(i=0; i < num_fds; i++) > close(fds[i].fd); > > /* check for partial event buffer */ -- Regards, - Corey Corey Ashford Software Engineer IBM Linux Technology Center, Linux Toolchain Beaverton, OR 503-578-3507 cjash...@us.ibm.com ------------------------------------------------------------------------------ _______________________________________________ perfmon2-devel mailing list perfmon2-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/perfmon2-devel