Corey, I reworked your patch a little bit. I promoted certain helper functions to perf_util.h.
I did not really like the extra max_fds parameter to the perf_setup_*(). I thought it was too much to carry around. Instead, I stash this value into the fds[] first entry. I kept num_fds as a parameter, this way the return value is really an error code. I also updated the patch to the latest HEAD. I also updated the x86 specific examples. Let me know what you think. Thanks. diff --git a/perf_examples/notify_group.c b/perf_examples/notify_group.c index 9192df2..598be72 100644 --- a/perf_examples/notify_group.c +++ b/perf_examples/notify_group.c @@ -44,7 +44,7 @@ typedef struct { static volatile unsigned long notification_received; static perf_event_desc_t *fds; -static int num_events; +static int num_fds; static int buffer_pages = 1; /* size of buffer payload (must be power of 2) */ @@ -56,7 +56,7 @@ sigio_handler(int n, struct siginfo *info, struct sigcontext *sc) uint64_t ip; int id, ret; - id = perf_fd2event(fds, num_events, info->si_fd); + id = perf_fd2event(fds, num_fds, info->si_fd); if (id == -1) errx(1, "cannot find event for descriptor %d", info->si_fd); @@ -129,15 +129,15 @@ main(int argc, char **argv) /* * allocates fd for us */ - num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," + ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," "PERF_COUNT_HW_CPU_CYCLES," "PERF_COUNT_HW_CPU_CYCLES", - &fds); - if (num_events < 1) + &fds, &num_fds); + if (ret || !num_fds) exit(1); fds[0].fd = -1; - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { /* want a notification for each sample added to the buffer */ fds[i].hw.disabled = !!i; printf("i=%d disabled=%d\n", i, fds[i].hw.disabled); @@ -180,7 +180,7 @@ main(int argc, char **argv) fds[i].pgmsk = (buffer_pages * pgsz) - 1; } - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { ret = ioctl(fds[i].fd, PERF_EVENT_IOC_REFRESH , 1); if (ret == -1) err(1, "cannot refresh"); @@ -194,7 +194,7 @@ error: /* * destroy our session */ - for(i=0; i < num_events; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); free(fds); diff --git a/perf_examples/notify_self.c b/perf_examples/notify_self.c index 12dcfc9..ceee2cd 100644 --- a/perf_examples/notify_self.c +++ b/perf_examples/notify_self.c @@ -40,8 +40,8 @@ static volatile unsigned long notification_received; -static perf_event_desc_t *fds; -static int num_events; +static perf_event_desc_t *fds = NULL; +static int num_fds = 0; static int buffer_pages = 1; /* size of buffer payload (must be power of 2)*/ @@ -79,7 +79,7 @@ print_sample(int id) if (ret) errx(1, "cannot read grp"); - e = perf_id2event(fds, num_events, grp.id); + e = perf_id2event(fds, num_fds, grp.id); if (e == -1) str = "unknown event"; else @@ -119,7 +119,7 @@ sigio_handler(int n, struct siginfo *info, void *uc) if (info->si_code != POLL_HUP) errx(1, "signal not generated by SIGIO"); - id = perf_fd2event(fds, num_events, info->si_fd); + id = perf_fd2event(fds, num_fds, info->si_fd); if (id == -1) errx(1, "no event associated with fd=%d", info->si_fd); @@ -188,14 +188,14 @@ main(int argc, char **argv) /* * allocates fd for us */ - num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," + ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES," "PERF_COUNT_HW_INSTRUCTIONS", - &fds); - if (num_events < 1) + &fds, &num_fds); + if (ret || (num_fds == 0)) exit(1); fds[0].fd = -1; - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { /* want a notification for every each added to the buffer */ fds[i].hw.disabled = !i; @@ -213,7 +213,7 @@ main(int argc, char **argv) err(1, "cannot attach event %s", fds[i].name); } - sz = (3+2*num_events)*sizeof(uint64_t); + sz = (3+2*num_fds)*sizeof(uint64_t); val = malloc(sz); if (!val) err(1, "cannot allocated memory"); @@ -245,7 +245,7 @@ main(int argc, char **argv) * We are skipping the first 3 values (nr, time_enabled, time_running) * and then for each event we get a pair of values. */ - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { fds[i].id = val[2*i+1+3]; printf("%"PRIu64" %s\n", fds[i].id, fds[i].name); } @@ -295,7 +295,7 @@ main(int argc, char **argv) /* * destroy our session */ - for(i=0; i < num_events; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); free(fds); diff --git a/perf_examples/perf_util.c b/perf_examples/perf_util.c index 4afc978..9c99b15 100644 --- a/perf_examples/perf_util.c +++ b/perf_examples/perf_util.c @@ -31,69 +31,86 @@ #include <perfmon/pfmlib_perf_event.h> #include "perf_util.h" +/* the **fd parameter must point to a null pointer on the first call + * max_fds and num_fds must both point to a zero value on the first call + * The return value is success (0) vs. failure (non-zero) + */ int -perf_setup_argv_events(const char **argv, perf_event_desc_t **fd) +perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int *num_fds) { - perf_event_desc_t *fdt = NULL; - int num = 0, max_fd = 0, new_max; - int ret; + perf_event_desc_t *fd; + int new_max, ret, num, max_fds; + int group_leader; - if (!(argv && fd)) + if (!(argv && fds && num_fds)) return -1; - - while(*argv) { - if (num == max_fd) { + fd = *fds; + if (fd) { + max_fds = fd[0].max_fds; + if (max_fds < 2) + return -1; + num = *num_fds; + } else { + max_fds = num = 0; /* bootstrap */ + } + group_leader = num; - if (!max_fd) + while(*argv) { + if (num == max_fds) { + if (max_fds == 0) new_max = 2; else - new_max = max_fd << 1; + new_max = max_fds << 1; - if (new_max < max_fd) { + if (new_max < max_fds) { warn("too many entries"); goto error; } - fdt = realloc(fdt, new_max * sizeof(*fdt)); - if (!fdt) { + fd = realloc(fd, new_max * sizeof(*fd)); + if (!fd) { warn("cannot allocate memory"); goto error; } /* reset newly allocated chunk */ - memset(fdt+max_fd, 0, (new_max - max_fd) * sizeof(*fdt)); - max_fd = new_max; + memset(fd + max_fds, 0, (new_max - max_fds) * sizeof(*fd)); + max_fds = new_max; + + /* update max size */ + fd[0].max_fds = max_fds; } - ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, &fdt[num].hw, NULL, NULL); + ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, &fd[num].hw, NULL, NULL); if (ret != PFM_SUCCESS) { warnx("event %s: %s\n", *argv, pfm_strerror(ret)); goto error; } /* ABI compatibility */ - fdt[num].hw.size = sizeof(struct perf_event_attr); - fdt[num].name = *argv; + fd[num].hw.size = sizeof(struct perf_event_attr); + + fd[num].name = *argv; + fd[num].group_leader = group_leader; num++; argv++; } - *fd = fdt; - return num; + *num_fds = num; + *fds = fd; + return 0; error: - free(fdt); + free(fd); return -1; } int -perf_setup_list_events(const char *ev, perf_event_desc_t **fd) +perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds) { const char **argv; - char *p, *q; - char *events; - int num = 0, i, ret; + char *p, *q, *events; + int i, ret, num = 0; - if (!(ev && fd)) + if (!(ev && fd && num_fds)) return -1; - events = strdup(ev); if (!events) return -1; @@ -105,7 +122,7 @@ perf_setup_list_events(const char *ev, perf_event_desc_t **fd) } num++; num++; /* terminator */ - + argv = malloc(num * sizeof(char *)); if (!argv) { free(events); @@ -119,12 +136,35 @@ perf_setup_list_events(const char *ev, perf_event_desc_t **fd) } argv[i++] = q; argv[i] = NULL; - ret = perf_setup_argv_events(argv, fd); + ret = perf_setup_argv_events(argv, fd, num_fds); free(argv); return ret; } int +perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx) +{ + int leader; + int i; + + if (idx < 0 || idx >= num) + return 0; + + leader = fds[idx].group_leader; + + for (i = leader + 1; i < num; i++) { + if (fds[i].group_leader != leader) { + /* This is a new group leader, so the previous + * event was the final event of the preceding + * group. + */ + return i - leader; + } + } + return 1; +} + +int perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, void *buf, size_t sz) { char *data; diff --git a/perf_examples/perf_util.h b/perf_examples/perf_util.h index 8f5222f..c65c003 100644 --- a/perf_examples/perf_util.h +++ b/perf_examples/perf_util.h @@ -36,14 +36,16 @@ typedef struct { uint64_t id; /* event id kernel */ void *buf; size_t pgmsk; + int group_leader; int fd; + int max_fds; } perf_event_desc_t; /* handy shortcut */ #define PERF_FORMAT_SCALE (PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING) -extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd); -extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd); +extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd, int *num_fds); +extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd, int *num_fds); extern int perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, void *buf, size_t sz); extern void perf_skip_buffer(struct perf_event_mmap_page *hdr, size_t sz); @@ -109,4 +111,13 @@ perf_id2event(perf_event_desc_t *fds, int num_events, uint64_t id) return j; return -1; } + +static inline int +perf_is_group_leader(perf_event_desc_t *fds, int idx) +{ + return fds[idx].group_leader == idx; +} + +extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int leader); + #endif diff --git a/perf_examples/self.c b/perf_examples/self.c index 12ebcd4..11e3d85 100644 --- a/perf_examples/self.c +++ b/perf_examples/self.c @@ -65,9 +65,9 @@ noploop(void) int main(int argc, char **argv) { - perf_event_desc_t *fds; + perf_event_desc_t *fds = NULL; uint64_t values[3]; - int i, ret, num; + int i, ret, num_fds = 0; setlocale(LC_ALL, ""); /* @@ -77,12 +77,12 @@ main(int argc, char **argv) if (ret != PFM_SUCCESS) errx(1, "Cannot initialize library: %s", pfm_strerror(ret)); - num = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : gen_events, &fds); - if (num == -1) + ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : gen_events, &fds, &num_fds); + if (ret || !num_fds) errx(1, "cannot setup events"); fds[0].fd = -1; - for(i=0; i < num; i++) { + for(i=0; i < num_fds; i++) { /* request timing information necessary for scaling */ fds[i].hw.read_format = PERF_FORMAT_SCALE; @@ -97,8 +97,8 @@ main(int argc, char **argv) signal(SIGALRM, sig_handler); /* - * enable all counters attached to this thread and created by it - */ + * enable all counters attached to this thread and created by it + */ ret = prctl(PR_TASK_PERF_EVENTS_ENABLE); if (ret) err(1, "prctl(enable) failed"); @@ -108,8 +108,8 @@ main(int argc, char **argv) noploop(); /* - * disable all counters attached to this thread - */ + * disable all counters attached to this thread + */ ret = prctl(PR_TASK_PERF_EVENTS_DISABLE); if (ret) err(1, "prctl(disable) failed"); @@ -121,7 +121,7 @@ main(int argc, char **argv) */ memset(values, 0, sizeof(values)); - for (i=0; i < num; i++) { + for (i=0; i < num_fds; i++) { uint64_t val; double ratio; diff --git a/perf_examples/self_count.c b/perf_examples/self_count.c index 48855fd..c77456d 100644 --- a/perf_examples/self_count.c +++ b/perf_examples/self_count.c @@ -179,10 +179,10 @@ read_count(perf_event_desc_t *fds) int main(int argc, char **argv) { - perf_event_desc_t *fds; + perf_event_desc_t *fds = NULL; size_t pgsz; uint64_t val; - int i, ret, num; + int i, ret, num_fds = 0; int n = 30; pgsz = sysconf(_SC_PAGESIZE); @@ -193,12 +193,12 @@ main(int argc, char **argv) if (ret != PFM_SUCCESS) errx(1, "Cannot initialize library: %s", pfm_strerror(ret)); - num = perf_setup_argv_events(argc > 1 ? (const char **)(argv+1) : gen_events, &fds); - if (num == -1) + ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 : gen_events, &fds, &num_fds); + if (ret || !num_fds) errx(1, "cannot setup events"); fds[0].fd = -1; - for(i=0; i < num; i++) { + for(i=0; i < num_fds; i++) { /* request timing information necesaary for scaling */ fds[i].hw.read_format = PERF_FORMAT_SCALE; fds[i].hw.disabled = 0; @@ -215,15 +215,15 @@ main(int argc, char **argv) signal(SIGALRM, sig_handler); /* - * enable all counters attached to this thread - */ + * enable all counters attached to this thread + */ ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0); alarm(10); for(;quit == 0;) { - - for (i=0; i < num; i++) { + + for (i=0; i < num_fds; i++) { val = read_count(&fds[i]); printf("%20"PRIu64" %s\n", val, fds[i].name); } @@ -233,11 +233,11 @@ main(int argc, char **argv) n = 30; } /* - * disable all counters attached to this thread - */ + * disable all counters attached to this thread + */ ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0); - for (i=0; i < num; i++) { + for (i=0; i < num_fds; i++) { munmap(fds[i].buf, pgsz); close(fds[i].fd); } diff --git a/perf_examples/self_pipe.c b/perf_examples/self_pipe.c index 8fb919a..90a90ff 100644 --- a/perf_examples/self_pipe.c +++ b/perf_examples/self_pipe.c @@ -85,8 +85,9 @@ static void measure(void) { perf_event_desc_t *fds = NULL; + int num_fds = 0; uint64_t values[3]; - int i, ret, num; + int i, ret; int pr[2], pw[2]; ssize_t nbytes; pid_t pid; @@ -109,11 +110,11 @@ measure(void) if (ret) err(1, "cannot create write pipe"); - num = perf_setup_list_events(options.events, &fds); - if (num < 1) + ret = perf_setup_list_events(options.events, &fds, &num_fds); + if (ret || !num_fds) exit(1); - for(i=0; i < num; i++) { + for(i=0; i < num_fds; i++) { fds[i].hw.disabled = 1; fds[i].hw.read_format = PERF_FORMAT_SCALE; @@ -141,7 +142,7 @@ measure(void) err(1, "cannot create child\n"); case 0: /* do not inherit session fd */ - for(i=0; i < num; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); /* pr[]: write master, read child */ /* pw[]: read master, write child */ @@ -170,7 +171,7 @@ measure(void) prctl(PR_TASK_PERF_EVENTS_DISABLE); - for(i=0; i < num; i++) { + for(i=0; i < num_fds; i++) { uint64_t val; double ratio; @@ -204,7 +205,7 @@ measure(void) /* * and destroy our session */ - for(i=0; i < num; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); free(fds); diff --git a/perf_examples/self_smpl_multi.c b/perf_examples/self_smpl_multi.c index 30d6acb..0ca45f5 100644 --- a/perf_examples/self_smpl_multi.c +++ b/perf_examples/self_smpl_multi.c @@ -77,7 +77,6 @@ static int program_time = PROGRAM_TIME; static int threshold = THRESHOLD; static int signum = SIGIO; static pthread_barrier_t barrier; -static int num_events; static int buffer_pages = 1; @@ -120,8 +119,9 @@ long bad_msg[MAX_THR]; long bad_restart[MAX_THR]; int fown; -int __thread myid; /* TLS */ +static int __thread myid; /* TLS */ static perf_event_desc_t __thread *fds; /* TLS */ +static int __thread num_fds; /* TLS */ pid_t gettid(void) @@ -277,8 +277,10 @@ overflow_start(char *name) size_t pgsz; int ret, fd, flags; - num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds); - if (num_events != 1) + fds = NULL; + num_fds = 0; + ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds, &num_fds); + if (ret || !num_fds) errx(1, "cannot monitor event"); pgsz = sysconf(_SC_PAGESIZE); diff --git a/perf_examples/syst.c b/perf_examples/syst.c index 429c6c4..a681e41 100644 --- a/perf_examples/syst.c +++ b/perf_examples/syst.c @@ -43,22 +43,21 @@ typedef struct { static options_t options; static perf_event_desc_t **all_fds; -static int num; +static int *num_fds; void setup_cpu(int cpu) { - perf_event_desc_t *fds = NULL; - int i; + perf_event_desc_t *fds; + int i, ret; - num = perf_setup_list_events(options.events, &fds); - if (num == -1) + ret = perf_setup_list_events(options.events, &all_fds[cpu], &num_fds[cpu]); + if (ret || (num_fds == 0)) errx(1, "cannot setup events\n"); - - all_fds[cpu] = fds; + fds = all_fds[cpu]; /* temp */ fds[0].fd = -1; - for(i=0; i < num; i++) { + for(i=0; i < num_fds[cpu]; i++) { fds[i].hw.disabled = options.group ? !i : 1; if (options.excl && ((options.group && !i) || (!options.group))) @@ -92,10 +91,11 @@ measure(void) cmax = cmin + 1; ncpus = 1; } - all_fds = malloc(ncpus * sizeof(perf_event_desc_t)); - if (!all_fds) - err(1, "cannot allocate memory for all_fds"); + all_fds = calloc(ncpus, sizeof(perf_event_desc_t)); + num_fds = calloc(ncpus, sizeof(int)); + if (!all_fds || !num_fds) + err(1, "cannot allocate memory for internal structures"); for(c=cmin ; c < cmax; c++) setup_cpu(c); @@ -106,7 +106,7 @@ measure(void) fds = all_fds[c]; if (options.group) ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0); - else for(i=0; i < num; i++) { + else for(i=0; i < num_fds[c]; i++) { ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0); if (ret) err(1, "cannot enable event %s\n", fds[i].name); @@ -121,7 +121,7 @@ measure(void) puts("------------------------"); for(c = cmin; c < cmax; c++) { fds = all_fds[c]; - for(i=0; i < num; i++) { + for(i=0; i < num_fds[c]; i++) { double ratio; ret = read(fds[i].fd, values, sizeof(values)); @@ -152,7 +152,7 @@ measure(void) } for(c = cmin; c < cmax; c++) { fds = all_fds[c]; - for(i=0; i < num; i++) + for(i=0; i < num_fds[c]; i++) close(fds[i].fd); } free(all_fds); diff --git a/perf_examples/task.c b/perf_examples/task.c index b29822a..6c37b84 100644 --- a/perf_examples/task.c +++ b/perf_examples/task.c @@ -35,10 +35,13 @@ #include "perf_util.h" +#define MAX_GROUPS 16 + typedef struct { - const char *events; + const char *events[MAX_GROUPS]; + int num_groups; + int format_group; int inherit; - int group; int print; int pin; pid_t pid; @@ -59,11 +62,11 @@ child(char **arg) } static void -read_group(perf_event_desc_t *fds, int num) +read_groups(perf_event_desc_t *fds, int num) { - uint64_t *values; - size_t sz; - int i, ret; + uint64_t *values = NULL; + size_t new_sz, sz = 0; + int i, evt, ret; /* * { u64 nr; @@ -76,56 +79,55 @@ read_group(perf_event_desc_t *fds, int num) * * we do not use FORMAT_ID in this program */ - sz = sizeof(uint64_t) * (3 + num); - values = malloc(sz); - if (!values) - err(1, "cannot allocate memory for values\n"); - - ret = read(fds[0].fd, values, sz); - if (ret != sz) { /* unsigned */ - if (ret == -1) - err(1, "cannot read values event %s", fds[0].name); - else /* likely pinned and could not be loaded */ - warnx("could not read event0 ret=%d", ret); - } - /* - * propagate to save area - */ - for(i=0; i < num; i++) { - values[0] = values[3+i]; - /* - * scaling because we may be sharing the PMU and - * thus may be multiplexed - */ - fds[i].prev_value = fds[i].value; - fds[i].value = perf_scale(values); - fds[i].enabled = values[1]; - fds[i].running = values[2]; - } - free(values); -} + for (evt = 0; evt < num; ) { + int num_evts_to_read; -static void -read_single(perf_event_desc_t *fds, int num) -{ - uint64_t values[3]; - int i, ret; + if (options.format_group) { + num_evts_to_read = perf_get_group_nevents(fds, num, evt); + new_sz = sizeof(uint64_t) * (3 + num_evts_to_read); + } else { + num_evts_to_read = 1; + new_sz = sizeof(uint64_t) * 3; + } - for(i=0; i < num; i++) { + if (new_sz > sz) { + sz = new_sz; + values = realloc(values, sz); + } - ret = read(fds[i].fd, values, sizeof(values)); - if (ret != sizeof(values)) { /* unsigned */ + if (!values) + err(1, "cannot allocate memory for values\n"); + + ret = read(fds[evt].fd, values, sz); + if (ret != sz) { /* unsigned */ if (ret == -1) - err(1, "cannot read values event %s", fds[i].name); - else /* likely pinned and could not be loaded */ - warnx("could not read event%d", i); + err(1, "cannot read values event %s", fds[0].name); + + /* likely pinned and could not be loaded */ + warnx("could not read event %d, tried to read %d bytes, but got %d", + evt, (int)sz, ret); + } + + /* + * propagate to save area + */ + for (i = evt; i < (evt + num_evts_to_read); i++) { + if (options.format_group) + values[0] = values[3 + (i - evt)]; + /* + * scaling because we may be sharing the PMU and + * thus may be multiplexed + */ + fds[i].prev_value = fds[i].value; + fds[i].value = perf_scale(values); + fds[i].enabled = values[1]; + fds[i].running = values[2]; } - fds[i].prev_value = fds[i].value; - fds[i].value = perf_scale(values); - fds[i].enabled = values[1]; - fds[i].running = values[2]; + evt += num_evts_to_read; } + if (values) + free(values); } static void @@ -133,21 +135,21 @@ print_counts(perf_event_desc_t *fds, int num) { int i; - if (options.group) - read_group(fds, num); - else - read_single(fds, num); + read_groups(fds, num); for(i=0; i < num; i++) { double ratio; uint64_t val; val = fds[i].value - fds[i].prev_value; - ratio = 0.0; if (fds[i].enabled) ratio = 1.0 * fds[i].running / fds[i].enabled; + /* separate groups */ + if (i && fds[i].hw.enable_on_exec) + putchar('\n'); + if (ratio == 1.0) printf("%'20"PRIu64" %s (%'"PRIu64" : %'"PRIu64")\n", val, fds[i].name, fds[i].enabled, fds[i].running); else @@ -167,8 +169,8 @@ static void sig_handler(int n) int parent(char **arg) { - perf_event_desc_t *fds; - int status, ret, i, num; + perf_event_desc_t *fds = NULL; + int status, ret, i, num_fds = 0, grp, group_fd; int ready[2], go[2]; char buf; pid_t pid; @@ -176,9 +178,12 @@ parent(char **arg) if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed"); - num = perf_setup_list_events(options.events, &fds); - if (num < 1) - exit(1); + for (grp = 0; grp < options.num_groups; grp++) { + int ret; + ret = perf_setup_list_events(options.events[grp], &fds, &num_fds); + if (ret || !num_fds) + exit(1); + } pid = options.pid; if (!pid) { @@ -232,37 +237,43 @@ parent(char **arg) } fds[0].fd = -1; - for(i=0; i < num; i++) { + for(i=0; i < num_fds; i++) { + int is_group_leader; /* boolean */ + + is_group_leader = perf_is_group_leader(fds, i); + if (is_group_leader) { + /* this is the group leader */ + group_fd = -1; + } else { + group_fd = fds[fds[i].group_leader].fd; + } + /* * create leader disabled with enable_on-exec */ if (!options.pid) { - if (options.group) { - fds[i].hw.disabled = !i; - fds[i].hw.enable_on_exec = !i; - } else { - fds[i].hw.disabled = 1; - fds[i].hw.enable_on_exec = 1; - } + fds[i].hw.disabled = is_group_leader; + fds[i].hw.enable_on_exec = is_group_leader; } fds[i].hw.read_format = PERF_FORMAT_SCALE; /* request timing information necessary for scaling counts */ - if (!i && options.group) - fds[0].hw.read_format |= PERF_FORMAT_GROUP; + if (is_group_leader && options.format_group) + fds[i].hw.read_format |= PERF_FORMAT_GROUP; if (options.inherit) fds[i].hw.inherit = 1; - if (options.pin && ((options.group && i== 0) || (!options.group))) + if (options.pin && is_group_leader) fds[i].hw.pinned = 1; - - fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, options.group ? fds[0].fd : -1, 0); + fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, group_fd, 0); if (fds[i].fd == -1) { warn("cannot attach event%d %s", i, fds[i].name); goto error; } - } + } + ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0); + if (!options.pid) close(go[1]); @@ -271,12 +282,12 @@ parent(char **arg) if (!options.pid) { while(waitpid(pid, &status, WNOHANG) == 0) { sleep(1); - print_counts(fds, num); + print_counts(fds, num_fds); } } else { while(quit == 0) { sleep(1); - print_counts(fds, num); + print_counts(fds, num_fds); } } } else { @@ -284,10 +295,10 @@ parent(char **arg) waitpid(pid, &status, 0); else pause(); - print_counts(fds, num); + print_counts(fds, num_fds); } - for(i=0; i < num; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); free(fds); @@ -305,11 +316,11 @@ usage(void) printf("usage: task [-h] [-i] [-g] [-p] [-P] [-t pid] [-e event1,event2,...] cmd\n" "-h\t\tget help\n" "-i\t\tinherit across fork\n" - "-g\t\tgroup events\n" + "-f\t\tuse PERF_FORMAT_GROUP for reading up counts (experimental, not working)\n" "-p\t\tprint counts every second\n" "-P\t\tpin events\n" "-t pid\tmeasure existing pid\n" - "-e ev,ev\tlist of events to measure\n" + "-e ev,ev\tgroup of events to measure (multiple -e switches are allowed)\n" ); } @@ -320,13 +331,18 @@ main(int argc, char **argv) setlocale(LC_ALL, ""); - while ((c=getopt(argc, argv,"he:igpPt:")) != -1) { + while ((c=getopt(argc, argv,"he:ifpPt:")) != -1) { switch(c) { case 'e': - options.events = optarg; + if (options.num_groups < MAX_GROUPS) { + options.events[options.num_groups++] = optarg; + } else { + errx(1, "you cannot specify more than %d groups.\n", + MAX_GROUPS); + } break; - case 'g': - options.group = 1; + case 'f': + options.format_group = 1; break; case 'p': options.print = 1; @@ -347,12 +363,13 @@ main(int argc, char **argv) errx(1, "unknown error"); } } - if (!options.events) - options.events = "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS"; - + if (options.num_groups == 0) { + options.events[0] = "PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS"; + options.num_groups = 1; + } if (!argv[optind] && !options.pid) errx(1, "you must specify a command to execute or a thread to attach to\n"); - + signal(SIGINT, sig_handler); return parent(argv+optind); diff --git a/perf_examples/task_attach_timeout.c b/perf_examples/task_attach_timeout.c index bfcb60d..a3bffb0 100644 --- a/perf_examples/task_attach_timeout.c +++ b/perf_examples/task_attach_timeout.c @@ -95,19 +95,19 @@ print_counts(perf_event_desc_t *fds, int num, int do_delta) int measure(pid_t pid) { - perf_event_desc_t *fds; - int i, num; + perf_event_desc_t *fds = NULL; + int i, ret, num_fds = 0; char fn[32]; if (pfm_initialize() != PFM_SUCCESS) errx(1, "libpfm initialization failed\n"); - num = perf_setup_list_events(options.events, &fds); - if (num < 1) + ret = perf_setup_list_events(options.events, &fds, &num_fds); + if (ret || (num_fds == 0)) exit(1); fds[0].fd = -1; - for(i=0; i < num; i++) { + for(i=0; i < num_fds; i++) { fds[i].hw.disabled = 0; /* start immediately */ /* request timing information necessary for scaling counts */ @@ -130,15 +130,15 @@ measure(pid_t pid) sleep(1); options.delay--; if (options.print) - print_counts(fds, num, 1); + print_counts(fds, num_fds, 1); } if (options.delay) warn("thread %d terminated before timeout", pid); if (!options.print) - print_counts(fds, num, 0); + print_counts(fds, num_fds, 0); - for(i=0; i < num; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); free(fds); diff --git a/perf_examples/task_smpl.c b/perf_examples/task_smpl.c index 7d029db..2ceeb01 100644 --- a/perf_examples/task_smpl.c +++ b/perf_examples/task_smpl.c @@ -58,7 +58,7 @@ typedef struct { static jmp_buf jbuf; static uint64_t collected_samples, lost_samples; static perf_event_desc_t *fds; -static int num_events; +static int num_fds; static options_t options; static uint64_t sum_period; @@ -293,7 +293,7 @@ display_sample(perf_event_desc_t *hw, struct perf_event_header *ehdr) sz -= sizeof(grp); - e = perf_id2event(fds, num_events, grp.id); + e = perf_id2event(fds, num_fds, grp.id); if (e == -1) str = "unknown sample event"; else @@ -395,7 +395,7 @@ display_lost(perf_event_desc_t *hw) if (ret) errx(1, "cannot read lost info"); - e = perf_id2event(fds, num_events, lost.id); + e = perf_id2event(fds, num_fds, lost.id); if (e == -1) str = "unknown lost event"; else @@ -486,8 +486,8 @@ mainloop(char **arg) /* * does allocate fds */ - num_events = perf_setup_list_events(options.events, &fds); - if (num_events == -1) + ret = perf_setup_list_events(options.events, &fds, &num_fds); + if (ret || !num_fds) errx(1, "cannot setup event list"); memset(pollfds, 0, sizeof(pollfds)); @@ -512,7 +512,7 @@ mainloop(char **arg) errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status)); fds[0].fd = -1; - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { fds[i].hw.disabled = 0; /* start immediately */ @@ -538,7 +538,7 @@ mainloop(char **arg) /* must get event id for SAMPLE_GROUP */ fds[i].hw.read_format = PERF_FORMAT_SCALE; - if (num_events > 1) + if (num_fds > 1) fds[i].hw.read_format |= PERF_FORMAT_GROUP|PERF_FORMAT_ID; } @@ -573,7 +573,7 @@ mainloop(char **arg) * We are skipping the first 3 values (nr, time_enabled, time_running) * and then for each event we get a pair of values. */ - sz = (3+2*num_events)*sizeof(uint64_t); + sz = (3+2*num_fds)*sizeof(uint64_t); val = malloc(sz); if (!val) err(1, "cannot allocate memory"); @@ -583,7 +583,7 @@ mainloop(char **arg) err(1, "cannot read id %zu", sizeof(val)); - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { fds[i].id = val[2*i+1+3]; printf("%"PRIu64" %s\n", fds[i].id, fds[i].name); } @@ -617,7 +617,7 @@ terminate_session: */ wait4(pid, &status, 0, NULL); - for(i=0; i < num_events; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); /* check for partial event buffer */ diff --git a/perf_examples/x86/bts_smpl.c b/perf_examples/x86/bts_smpl.c index 46237bd..b28fd06 100644 --- a/perf_examples/x86/bts_smpl.c +++ b/perf_examples/x86/bts_smpl.c @@ -59,7 +59,7 @@ typedef struct { static jmp_buf jbuf; static uint64_t collected_samples, lost_samples; static perf_event_desc_t *fds; -static int num_events; +static int num_fds; static options_t options; static struct option the_options[]={ @@ -172,7 +172,7 @@ display_lost(perf_event_desc_t *hw) if (ret) errx(1, "cannot read lost info"); - e = perf_id2event(fds, num_events, lost.id); + e = perf_id2event(fds, num_fds, lost.id); if (e == -1) str = "unknown lost event"; else @@ -239,8 +239,8 @@ mainloop(char **arg) /* * does allocate fds */ - num_events = perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds); - if (num_events == -1) + ret = perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds, &num_fds); + if (ret || !num_fds) errx(1, "cannot setup event"); memset(pollfds, 0, sizeof(pollfds)); diff --git a/perf_examples/x86/pebs_smpl.c b/perf_examples/x86/pebs_smpl.c index 83ba098..f24aa44 100644 --- a/perf_examples/x86/pebs_smpl.c +++ b/perf_examples/x86/pebs_smpl.c @@ -54,7 +54,7 @@ typedef struct { static jmp_buf jbuf; static uint64_t collected_samples, lost_samples; static perf_event_desc_t *fds; -static int num_events; +static int num_fds; static options_t options; static uint64_t sum_period; @@ -353,7 +353,7 @@ display_sample(perf_event_desc_t *hw, struct perf_event_header *ehdr) sz -= sizeof(grp); - e = perf_id2event(fds, num_events, grp.id); + e = perf_id2event(fds, num_fds, grp.id); if (e == -1) str = "unknown sample event"; else @@ -419,7 +419,7 @@ display_lost(perf_event_desc_t *hw) if (ret) errx(1, "cannot read lost info"); - e = perf_id2event(fds, num_events, lost.id); + e = perf_id2event(fds, num_fds, lost.id); if (e == -1) str = "unknown lost event"; else @@ -510,8 +510,8 @@ mainloop(char **arg) /* * does allocate fds */ - num_events = perf_setup_list_events(options.events, &fds); - if (num_events == -1) + ret = perf_setup_list_events(options.events, &fds, &num_fds); + if (ret || !num_fds) errx(1, "cannot setup event list"); memset(pollfds, 0, sizeof(pollfds)); @@ -536,7 +536,7 @@ mainloop(char **arg) errx(1, "task %s [%d] exited already status %d\n", arg[0], pid, WEXITSTATUS(status)); fds[0].fd = -1; - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { fds[i].hw.disabled = 0; /* start immediately */ @@ -595,7 +595,7 @@ mainloop(char **arg) * We are skipping the first 3 values (nr, time_enabled, time_running) * and then for each event we get a pair of values. */ - sz = (3+2*num_events)*sizeof(uint64_t); + sz = (3+2*num_fds)*sizeof(uint64_t); val = malloc(sz); if (!val) err(1, "cannot allocated memory"); @@ -605,7 +605,7 @@ mainloop(char **arg) err(1, "cannot read id %zu", sizeof(val)); - for(i=0; i < num_events; i++) { + for(i=0; i < num_fds; i++) { fds[i].id = val[2*i+1+3]; printf("%"PRIu64" %s\n", fds[i].id, fds[i].name); } @@ -639,7 +639,7 @@ terminate_session: */ wait4(pid, &status, 0, NULL); - for(i=0; i < num_events; i++) + for(i=0; i < num_fds; i++) close(fds[i].fd); /* check for partial event buffer */ ------------------------------------------------------------------------------ _______________________________________________ perfmon2-devel mailing list perfmon2-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/perfmon2-devel