Corey,
I reworked your patch a little bit. I promoted certain helper functions
to perf_util.h.
I did not really like the extra max_fds parameter to the perf_setup_*().
I thought it was too much to carry around. Instead, I stash this value
into the fds[] first entry. I kept num_fds as a parameter, this way
the return value is really an error code. I also updated the patch to
the latest HEAD.
I also updated the x86 specific examples.
Let me know what you think.
Thanks.
diff --git a/perf_examples/notify_group.c b/perf_examples/notify_group.c
index 9192df2..598be72 100644
--- a/perf_examples/notify_group.c
+++ b/perf_examples/notify_group.c
@@ -44,7 +44,7 @@ typedef struct {
static volatile unsigned long notification_received;
static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
static int buffer_pages = 1; /* size of buffer payload (must be power of 2) */
@@ -56,7 +56,7 @@ sigio_handler(int n, struct siginfo *info, struct sigcontext
*sc)
uint64_t ip;
int id, ret;
- id = perf_fd2event(fds, num_events, info->si_fd);
+ id = perf_fd2event(fds, num_fds, info->si_fd);
if (id == -1)
errx(1, "cannot find event for descriptor %d", info->si_fd);
@@ -129,15 +129,15 @@ main(int argc, char **argv)
/*
* allocates fd for us
*/
- num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
+ ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
"PERF_COUNT_HW_CPU_CYCLES,"
"PERF_COUNT_HW_CPU_CYCLES",
- &fds);
- if (num_events < 1)
+ &fds, &num_fds);
+ if (ret || !num_fds)
exit(1);
fds[0].fd = -1;
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
/* want a notification for each sample added to the buffer */
fds[i].hw.disabled = !!i;
printf("i=%d disabled=%d\n", i, fds[i].hw.disabled);
@@ -180,7 +180,7 @@ main(int argc, char **argv)
fds[i].pgmsk = (buffer_pages * pgsz) - 1;
}
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
ret = ioctl(fds[i].fd, PERF_EVENT_IOC_REFRESH , 1);
if (ret == -1)
err(1, "cannot refresh");
@@ -194,7 +194,7 @@ error:
/*
* destroy our session
*/
- for(i=0; i < num_events; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
free(fds);
diff --git a/perf_examples/notify_self.c b/perf_examples/notify_self.c
index 12dcfc9..ceee2cd 100644
--- a/perf_examples/notify_self.c
+++ b/perf_examples/notify_self.c
@@ -40,8 +40,8 @@
static volatile unsigned long notification_received;
-static perf_event_desc_t *fds;
-static int num_events;
+static perf_event_desc_t *fds = NULL;
+static int num_fds = 0;
static int buffer_pages = 1; /* size of buffer payload (must be power of 2)*/
@@ -79,7 +79,7 @@ print_sample(int id)
if (ret)
errx(1, "cannot read grp");
- e = perf_id2event(fds, num_events, grp.id);
+ e = perf_id2event(fds, num_fds, grp.id);
if (e == -1)
str = "unknown event";
else
@@ -119,7 +119,7 @@ sigio_handler(int n, struct siginfo *info, void *uc)
if (info->si_code != POLL_HUP)
errx(1, "signal not generated by SIGIO");
- id = perf_fd2event(fds, num_events, info->si_fd);
+ id = perf_fd2event(fds, num_fds, info->si_fd);
if (id == -1)
errx(1, "no event associated with fd=%d", info->si_fd);
@@ -188,14 +188,14 @@ main(int argc, char **argv)
/*
* allocates fd for us
*/
- num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
+ ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES,"
"PERF_COUNT_HW_INSTRUCTIONS",
- &fds);
- if (num_events < 1)
+ &fds, &num_fds);
+ if (ret || (num_fds == 0))
exit(1);
fds[0].fd = -1;
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
/* want a notification for every each added to the buffer */
fds[i].hw.disabled = !i;
@@ -213,7 +213,7 @@ main(int argc, char **argv)
err(1, "cannot attach event %s", fds[i].name);
}
- sz = (3+2*num_events)*sizeof(uint64_t);
+ sz = (3+2*num_fds)*sizeof(uint64_t);
val = malloc(sz);
if (!val)
err(1, "cannot allocated memory");
@@ -245,7 +245,7 @@ main(int argc, char **argv)
* We are skipping the first 3 values (nr, time_enabled, time_running)
* and then for each event we get a pair of values.
*/
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
fds[i].id = val[2*i+1+3];
printf("%"PRIu64" %s\n", fds[i].id, fds[i].name);
}
@@ -295,7 +295,7 @@ main(int argc, char **argv)
/*
* destroy our session
*/
- for(i=0; i < num_events; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
free(fds);
diff --git a/perf_examples/perf_util.c b/perf_examples/perf_util.c
index 4afc978..9c99b15 100644
--- a/perf_examples/perf_util.c
+++ b/perf_examples/perf_util.c
@@ -31,69 +31,86 @@
#include <perfmon/pfmlib_perf_event.h>
#include "perf_util.h"
+/* the **fd parameter must point to a null pointer on the first call
+ * max_fds and num_fds must both point to a zero value on the first call
+ * The return value is success (0) vs. failure (non-zero)
+ */
int
-perf_setup_argv_events(const char **argv, perf_event_desc_t **fd)
+perf_setup_argv_events(const char **argv, perf_event_desc_t **fds, int
*num_fds)
{
- perf_event_desc_t *fdt = NULL;
- int num = 0, max_fd = 0, new_max;
- int ret;
+ perf_event_desc_t *fd;
+ int new_max, ret, num, max_fds;
+ int group_leader;
- if (!(argv && fd))
+ if (!(argv && fds && num_fds))
return -1;
-
- while(*argv) {
- if (num == max_fd) {
+ fd = *fds;
+ if (fd) {
+ max_fds = fd[0].max_fds;
+ if (max_fds < 2)
+ return -1;
+ num = *num_fds;
+ } else {
+ max_fds = num = 0; /* bootstrap */
+ }
+ group_leader = num;
- if (!max_fd)
+ while(*argv) {
+ if (num == max_fds) {
+ if (max_fds == 0)
new_max = 2;
else
- new_max = max_fd << 1;
+ new_max = max_fds << 1;
- if (new_max < max_fd) {
+ if (new_max < max_fds) {
warn("too many entries");
goto error;
}
- fdt = realloc(fdt, new_max * sizeof(*fdt));
- if (!fdt) {
+ fd = realloc(fd, new_max * sizeof(*fd));
+ if (!fd) {
warn("cannot allocate memory");
goto error;
}
/* reset newly allocated chunk */
- memset(fdt+max_fd, 0, (new_max - max_fd) *
sizeof(*fdt));
- max_fd = new_max;
+ memset(fd + max_fds, 0, (new_max - max_fds) *
sizeof(*fd));
+ max_fds = new_max;
+
+ /* update max size */
+ fd[0].max_fds = max_fds;
}
- ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3,
&fdt[num].hw, NULL, NULL);
+ ret = pfm_get_perf_event_encoding(*argv, PFM_PLM3, &fd[num].hw,
NULL, NULL);
if (ret != PFM_SUCCESS) {
warnx("event %s: %s\n", *argv, pfm_strerror(ret));
goto error;
}
/* ABI compatibility */
- fdt[num].hw.size = sizeof(struct perf_event_attr);
- fdt[num].name = *argv;
+ fd[num].hw.size = sizeof(struct perf_event_attr);
+
+ fd[num].name = *argv;
+ fd[num].group_leader = group_leader;
num++;
argv++;
}
- *fd = fdt;
- return num;
+ *num_fds = num;
+ *fds = fd;
+ return 0;
error:
- free(fdt);
+ free(fd);
return -1;
}
int
-perf_setup_list_events(const char *ev, perf_event_desc_t **fd)
+perf_setup_list_events(const char *ev, perf_event_desc_t **fd, int *num_fds)
{
const char **argv;
- char *p, *q;
- char *events;
- int num = 0, i, ret;
+ char *p, *q, *events;
+ int i, ret, num = 0;
- if (!(ev && fd))
+ if (!(ev && fd && num_fds))
return -1;
-
events = strdup(ev);
if (!events)
return -1;
@@ -105,7 +122,7 @@ perf_setup_list_events(const char *ev, perf_event_desc_t
**fd)
}
num++;
num++; /* terminator */
-
+
argv = malloc(num * sizeof(char *));
if (!argv) {
free(events);
@@ -119,12 +136,35 @@ perf_setup_list_events(const char *ev, perf_event_desc_t
**fd)
}
argv[i++] = q;
argv[i] = NULL;
- ret = perf_setup_argv_events(argv, fd);
+ ret = perf_setup_argv_events(argv, fd, num_fds);
free(argv);
return ret;
}
int
+perf_get_group_nevents(perf_event_desc_t *fds, int num, int idx)
+{
+ int leader;
+ int i;
+
+ if (idx < 0 || idx >= num)
+ return 0;
+
+ leader = fds[idx].group_leader;
+
+ for (i = leader + 1; i < num; i++) {
+ if (fds[i].group_leader != leader) {
+ /* This is a new group leader, so the previous
+ * event was the final event of the preceding
+ * group.
+ */
+ return i - leader;
+ }
+ }
+ return 1;
+}
+
+int
perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk, void *buf,
size_t sz)
{
char *data;
diff --git a/perf_examples/perf_util.h b/perf_examples/perf_util.h
index 8f5222f..c65c003 100644
--- a/perf_examples/perf_util.h
+++ b/perf_examples/perf_util.h
@@ -36,14 +36,16 @@ typedef struct {
uint64_t id; /* event id kernel */
void *buf;
size_t pgmsk;
+ int group_leader;
int fd;
+ int max_fds;
} perf_event_desc_t;
/* handy shortcut */
#define PERF_FORMAT_SCALE
(PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING)
-extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd);
-extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd);
+extern int perf_setup_argv_events(const char **argv, perf_event_desc_t **fd,
int *num_fds);
+extern int perf_setup_list_events(const char *events, perf_event_desc_t **fd,
int *num_fds);
extern int perf_read_buffer(struct perf_event_mmap_page *hdr, size_t pgmsk,
void *buf, size_t sz);
extern void perf_skip_buffer(struct perf_event_mmap_page *hdr, size_t sz);
@@ -109,4 +111,13 @@ perf_id2event(perf_event_desc_t *fds, int num_events,
uint64_t id)
return j;
return -1;
}
+
+static inline int
+perf_is_group_leader(perf_event_desc_t *fds, int idx)
+{
+ return fds[idx].group_leader == idx;
+}
+
+extern int perf_get_group_nevents(perf_event_desc_t *fds, int num, int leader);
+
#endif
diff --git a/perf_examples/self.c b/perf_examples/self.c
index 12ebcd4..11e3d85 100644
--- a/perf_examples/self.c
+++ b/perf_examples/self.c
@@ -65,9 +65,9 @@ noploop(void)
int
main(int argc, char **argv)
{
- perf_event_desc_t *fds;
+ perf_event_desc_t *fds = NULL;
uint64_t values[3];
- int i, ret, num;
+ int i, ret, num_fds = 0;
setlocale(LC_ALL, "");
/*
@@ -77,12 +77,12 @@ main(int argc, char **argv)
if (ret != PFM_SUCCESS)
errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
- num = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 :
gen_events, &fds);
- if (num == -1)
+ ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 :
gen_events, &fds, &num_fds);
+ if (ret || !num_fds)
errx(1, "cannot setup events");
fds[0].fd = -1;
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds; i++) {
/* request timing information necessary for scaling */
fds[i].hw.read_format = PERF_FORMAT_SCALE;
@@ -97,8 +97,8 @@ main(int argc, char **argv)
signal(SIGALRM, sig_handler);
/*
- * enable all counters attached to this thread and created by it
- */
+ * enable all counters attached to this thread and created by it
+ */
ret = prctl(PR_TASK_PERF_EVENTS_ENABLE);
if (ret)
err(1, "prctl(enable) failed");
@@ -108,8 +108,8 @@ main(int argc, char **argv)
noploop();
/*
- * disable all counters attached to this thread
- */
+ * disable all counters attached to this thread
+ */
ret = prctl(PR_TASK_PERF_EVENTS_DISABLE);
if (ret)
err(1, "prctl(disable) failed");
@@ -121,7 +121,7 @@ main(int argc, char **argv)
*/
memset(values, 0, sizeof(values));
- for (i=0; i < num; i++) {
+ for (i=0; i < num_fds; i++) {
uint64_t val;
double ratio;
diff --git a/perf_examples/self_count.c b/perf_examples/self_count.c
index 48855fd..c77456d 100644
--- a/perf_examples/self_count.c
+++ b/perf_examples/self_count.c
@@ -179,10 +179,10 @@ read_count(perf_event_desc_t *fds)
int
main(int argc, char **argv)
{
- perf_event_desc_t *fds;
+ perf_event_desc_t *fds = NULL;
size_t pgsz;
uint64_t val;
- int i, ret, num;
+ int i, ret, num_fds = 0;
int n = 30;
pgsz = sysconf(_SC_PAGESIZE);
@@ -193,12 +193,12 @@ main(int argc, char **argv)
if (ret != PFM_SUCCESS)
errx(1, "Cannot initialize library: %s", pfm_strerror(ret));
- num = perf_setup_argv_events(argc > 1 ? (const char **)(argv+1) :
gen_events, &fds);
- if (num == -1)
+ ret = perf_setup_argv_events(argc > 1 ? (const char **)argv+1 :
gen_events, &fds, &num_fds);
+ if (ret || !num_fds)
errx(1, "cannot setup events");
fds[0].fd = -1;
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds; i++) {
/* request timing information necesaary for scaling */
fds[i].hw.read_format = PERF_FORMAT_SCALE;
fds[i].hw.disabled = 0;
@@ -215,15 +215,15 @@ main(int argc, char **argv)
signal(SIGALRM, sig_handler);
/*
- * enable all counters attached to this thread
- */
+ * enable all counters attached to this thread
+ */
ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0);
alarm(10);
for(;quit == 0;) {
-
- for (i=0; i < num; i++) {
+
+ for (i=0; i < num_fds; i++) {
val = read_count(&fds[i]);
printf("%20"PRIu64" %s\n", val, fds[i].name);
}
@@ -233,11 +233,11 @@ main(int argc, char **argv)
n = 30;
}
/*
- * disable all counters attached to this thread
- */
+ * disable all counters attached to this thread
+ */
ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
- for (i=0; i < num; i++) {
+ for (i=0; i < num_fds; i++) {
munmap(fds[i].buf, pgsz);
close(fds[i].fd);
}
diff --git a/perf_examples/self_pipe.c b/perf_examples/self_pipe.c
index 8fb919a..90a90ff 100644
--- a/perf_examples/self_pipe.c
+++ b/perf_examples/self_pipe.c
@@ -85,8 +85,9 @@ static void
measure(void)
{
perf_event_desc_t *fds = NULL;
+ int num_fds = 0;
uint64_t values[3];
- int i, ret, num;
+ int i, ret;
int pr[2], pw[2];
ssize_t nbytes;
pid_t pid;
@@ -109,11 +110,11 @@ measure(void)
if (ret)
err(1, "cannot create write pipe");
- num = perf_setup_list_events(options.events, &fds);
- if (num < 1)
+ ret = perf_setup_list_events(options.events, &fds, &num_fds);
+ if (ret || !num_fds)
exit(1);
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds; i++) {
fds[i].hw.disabled = 1;
fds[i].hw.read_format = PERF_FORMAT_SCALE;
@@ -141,7 +142,7 @@ measure(void)
err(1, "cannot create child\n");
case 0:
/* do not inherit session fd */
- for(i=0; i < num; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
/* pr[]: write master, read child */
/* pw[]: read master, write child */
@@ -170,7 +171,7 @@ measure(void)
prctl(PR_TASK_PERF_EVENTS_DISABLE);
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds; i++) {
uint64_t val;
double ratio;
@@ -204,7 +205,7 @@ measure(void)
/*
* and destroy our session
*/
- for(i=0; i < num; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
free(fds);
diff --git a/perf_examples/self_smpl_multi.c b/perf_examples/self_smpl_multi.c
index 30d6acb..0ca45f5 100644
--- a/perf_examples/self_smpl_multi.c
+++ b/perf_examples/self_smpl_multi.c
@@ -77,7 +77,6 @@ static int program_time = PROGRAM_TIME;
static int threshold = THRESHOLD;
static int signum = SIGIO;
static pthread_barrier_t barrier;
-static int num_events;
static int buffer_pages = 1;
@@ -120,8 +119,9 @@ long bad_msg[MAX_THR];
long bad_restart[MAX_THR];
int fown;
-int __thread myid; /* TLS */
+static int __thread myid; /* TLS */
static perf_event_desc_t __thread *fds; /* TLS */
+static int __thread num_fds; /* TLS */
pid_t
gettid(void)
@@ -277,8 +277,10 @@ overflow_start(char *name)
size_t pgsz;
int ret, fd, flags;
- num_events = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds);
- if (num_events != 1)
+ fds = NULL;
+ num_fds = 0;
+ ret = perf_setup_list_events("PERF_COUNT_HW_CPU_CYCLES", &fds,
&num_fds);
+ if (ret || !num_fds)
errx(1, "cannot monitor event");
pgsz = sysconf(_SC_PAGESIZE);
diff --git a/perf_examples/syst.c b/perf_examples/syst.c
index 429c6c4..a681e41 100644
--- a/perf_examples/syst.c
+++ b/perf_examples/syst.c
@@ -43,22 +43,21 @@ typedef struct {
static options_t options;
static perf_event_desc_t **all_fds;
-static int num;
+static int *num_fds;
void
setup_cpu(int cpu)
{
- perf_event_desc_t *fds = NULL;
- int i;
+ perf_event_desc_t *fds;
+ int i, ret;
- num = perf_setup_list_events(options.events, &fds);
- if (num == -1)
+ ret = perf_setup_list_events(options.events, &all_fds[cpu],
&num_fds[cpu]);
+ if (ret || (num_fds == 0))
errx(1, "cannot setup events\n");
-
- all_fds[cpu] = fds;
+ fds = all_fds[cpu]; /* temp */
fds[0].fd = -1;
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds[cpu]; i++) {
fds[i].hw.disabled = options.group ? !i : 1;
if (options.excl && ((options.group && !i) || (!options.group)))
@@ -92,10 +91,11 @@ measure(void)
cmax = cmin + 1;
ncpus = 1;
}
- all_fds = malloc(ncpus * sizeof(perf_event_desc_t));
- if (!all_fds)
- err(1, "cannot allocate memory for all_fds");
+ all_fds = calloc(ncpus, sizeof(perf_event_desc_t));
+ num_fds = calloc(ncpus, sizeof(int));
+ if (!all_fds || !num_fds)
+ err(1, "cannot allocate memory for internal structures");
for(c=cmin ; c < cmax; c++)
setup_cpu(c);
@@ -106,7 +106,7 @@ measure(void)
fds = all_fds[c];
if (options.group)
ret = ioctl(fds[0].fd, PERF_EVENT_IOC_ENABLE, 0);
- else for(i=0; i < num; i++) {
+ else for(i=0; i < num_fds[c]; i++) {
ret = ioctl(fds[i].fd, PERF_EVENT_IOC_ENABLE, 0);
if (ret)
err(1, "cannot enable event %s\n", fds[i].name);
@@ -121,7 +121,7 @@ measure(void)
puts("------------------------");
for(c = cmin; c < cmax; c++) {
fds = all_fds[c];
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds[c]; i++) {
double ratio;
ret = read(fds[i].fd, values, sizeof(values));
@@ -152,7 +152,7 @@ measure(void)
}
for(c = cmin; c < cmax; c++) {
fds = all_fds[c];
- for(i=0; i < num; i++)
+ for(i=0; i < num_fds[c]; i++)
close(fds[i].fd);
}
free(all_fds);
diff --git a/perf_examples/task.c b/perf_examples/task.c
index b29822a..6c37b84 100644
--- a/perf_examples/task.c
+++ b/perf_examples/task.c
@@ -35,10 +35,13 @@
#include "perf_util.h"
+#define MAX_GROUPS 16
+
typedef struct {
- const char *events;
+ const char *events[MAX_GROUPS];
+ int num_groups;
+ int format_group;
int inherit;
- int group;
int print;
int pin;
pid_t pid;
@@ -59,11 +62,11 @@ child(char **arg)
}
static void
-read_group(perf_event_desc_t *fds, int num)
+read_groups(perf_event_desc_t *fds, int num)
{
- uint64_t *values;
- size_t sz;
- int i, ret;
+ uint64_t *values = NULL;
+ size_t new_sz, sz = 0;
+ int i, evt, ret;
/*
* { u64 nr;
@@ -76,56 +79,55 @@ read_group(perf_event_desc_t *fds, int num)
*
* we do not use FORMAT_ID in this program
*/
- sz = sizeof(uint64_t) * (3 + num);
- values = malloc(sz);
- if (!values)
- err(1, "cannot allocate memory for values\n");
-
- ret = read(fds[0].fd, values, sz);
- if (ret != sz) { /* unsigned */
- if (ret == -1)
- err(1, "cannot read values event %s", fds[0].name);
- else /* likely pinned and could not be loaded */
- warnx("could not read event0 ret=%d", ret);
- }
- /*
- * propagate to save area
- */
- for(i=0; i < num; i++) {
- values[0] = values[3+i];
- /*
- * scaling because we may be sharing the PMU and
- * thus may be multiplexed
- */
- fds[i].prev_value = fds[i].value;
- fds[i].value = perf_scale(values);
- fds[i].enabled = values[1];
- fds[i].running = values[2];
- }
- free(values);
-}
+ for (evt = 0; evt < num; ) {
+ int num_evts_to_read;
-static void
-read_single(perf_event_desc_t *fds, int num)
-{
- uint64_t values[3];
- int i, ret;
+ if (options.format_group) {
+ num_evts_to_read = perf_get_group_nevents(fds, num,
evt);
+ new_sz = sizeof(uint64_t) * (3 + num_evts_to_read);
+ } else {
+ num_evts_to_read = 1;
+ new_sz = sizeof(uint64_t) * 3;
+ }
- for(i=0; i < num; i++) {
+ if (new_sz > sz) {
+ sz = new_sz;
+ values = realloc(values, sz);
+ }
- ret = read(fds[i].fd, values, sizeof(values));
- if (ret != sizeof(values)) { /* unsigned */
+ if (!values)
+ err(1, "cannot allocate memory for values\n");
+
+ ret = read(fds[evt].fd, values, sz);
+ if (ret != sz) { /* unsigned */
if (ret == -1)
- err(1, "cannot read values event %s",
fds[i].name);
- else /* likely pinned and could not be loaded */
- warnx("could not read event%d", i);
+ err(1, "cannot read values event %s",
fds[0].name);
+
+ /* likely pinned and could not be loaded */
+ warnx("could not read event %d, tried to read %d bytes,
but got %d",
+ evt, (int)sz, ret);
+ }
+
+ /*
+ * propagate to save area
+ */
+ for (i = evt; i < (evt + num_evts_to_read); i++) {
+ if (options.format_group)
+ values[0] = values[3 + (i - evt)];
+ /*
+ * scaling because we may be sharing the PMU and
+ * thus may be multiplexed
+ */
+ fds[i].prev_value = fds[i].value;
+ fds[i].value = perf_scale(values);
+ fds[i].enabled = values[1];
+ fds[i].running = values[2];
}
- fds[i].prev_value = fds[i].value;
- fds[i].value = perf_scale(values);
- fds[i].enabled = values[1];
- fds[i].running = values[2];
+ evt += num_evts_to_read;
}
+ if (values)
+ free(values);
}
static void
@@ -133,21 +135,21 @@ print_counts(perf_event_desc_t *fds, int num)
{
int i;
- if (options.group)
- read_group(fds, num);
- else
- read_single(fds, num);
+ read_groups(fds, num);
for(i=0; i < num; i++) {
double ratio;
uint64_t val;
val = fds[i].value - fds[i].prev_value;
-
ratio = 0.0;
if (fds[i].enabled)
ratio = 1.0 * fds[i].running / fds[i].enabled;
+ /* separate groups */
+ if (i && fds[i].hw.enable_on_exec)
+ putchar('\n');
+
if (ratio == 1.0)
printf("%'20"PRIu64" %s (%'"PRIu64" : %'"PRIu64")\n",
val, fds[i].name, fds[i].enabled, fds[i].running);
else
@@ -167,8 +169,8 @@ static void sig_handler(int n)
int
parent(char **arg)
{
- perf_event_desc_t *fds;
- int status, ret, i, num;
+ perf_event_desc_t *fds = NULL;
+ int status, ret, i, num_fds = 0, grp, group_fd;
int ready[2], go[2];
char buf;
pid_t pid;
@@ -176,9 +178,12 @@ parent(char **arg)
if (pfm_initialize() != PFM_SUCCESS)
errx(1, "libpfm initialization failed");
- num = perf_setup_list_events(options.events, &fds);
- if (num < 1)
- exit(1);
+ for (grp = 0; grp < options.num_groups; grp++) {
+ int ret;
+ ret = perf_setup_list_events(options.events[grp], &fds,
&num_fds);
+ if (ret || !num_fds)
+ exit(1);
+ }
pid = options.pid;
if (!pid) {
@@ -232,37 +237,43 @@ parent(char **arg)
}
fds[0].fd = -1;
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds; i++) {
+ int is_group_leader; /* boolean */
+
+ is_group_leader = perf_is_group_leader(fds, i);
+ if (is_group_leader) {
+ /* this is the group leader */
+ group_fd = -1;
+ } else {
+ group_fd = fds[fds[i].group_leader].fd;
+ }
+
/*
* create leader disabled with enable_on-exec
*/
if (!options.pid) {
- if (options.group) {
- fds[i].hw.disabled = !i;
- fds[i].hw.enable_on_exec = !i;
- } else {
- fds[i].hw.disabled = 1;
- fds[i].hw.enable_on_exec = 1;
- }
+ fds[i].hw.disabled = is_group_leader;
+ fds[i].hw.enable_on_exec = is_group_leader;
}
fds[i].hw.read_format = PERF_FORMAT_SCALE;
/* request timing information necessary for scaling counts */
- if (!i && options.group)
- fds[0].hw.read_format |= PERF_FORMAT_GROUP;
+ if (is_group_leader && options.format_group)
+ fds[i].hw.read_format |= PERF_FORMAT_GROUP;
if (options.inherit)
fds[i].hw.inherit = 1;
- if (options.pin && ((options.group && i== 0) ||
(!options.group)))
+ if (options.pin && is_group_leader)
fds[i].hw.pinned = 1;
-
- fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, options.group
? fds[0].fd : -1, 0);
+ fds[i].fd = perf_event_open(&fds[i].hw, pid, -1, group_fd, 0);
if (fds[i].fd == -1) {
warn("cannot attach event%d %s", i, fds[i].name);
goto error;
}
- }
+ }
+ ioctl(fds[0].fd, PERF_EVENT_IOC_DISABLE, 0);
+
if (!options.pid)
close(go[1]);
@@ -271,12 +282,12 @@ parent(char **arg)
if (!options.pid) {
while(waitpid(pid, &status, WNOHANG) == 0) {
sleep(1);
- print_counts(fds, num);
+ print_counts(fds, num_fds);
}
} else {
while(quit == 0) {
sleep(1);
- print_counts(fds, num);
+ print_counts(fds, num_fds);
}
}
} else {
@@ -284,10 +295,10 @@ parent(char **arg)
waitpid(pid, &status, 0);
else
pause();
- print_counts(fds, num);
+ print_counts(fds, num_fds);
}
- for(i=0; i < num; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
free(fds);
@@ -305,11 +316,11 @@ usage(void)
printf("usage: task [-h] [-i] [-g] [-p] [-P] [-t pid] [-e
event1,event2,...] cmd\n"
"-h\t\tget help\n"
"-i\t\tinherit across fork\n"
- "-g\t\tgroup events\n"
+ "-f\t\tuse PERF_FORMAT_GROUP for reading up counts
(experimental, not working)\n"
"-p\t\tprint counts every second\n"
"-P\t\tpin events\n"
"-t pid\tmeasure existing pid\n"
- "-e ev,ev\tlist of events to measure\n"
+ "-e ev,ev\tgroup of events to measure (multiple -e switches are
allowed)\n"
);
}
@@ -320,13 +331,18 @@ main(int argc, char **argv)
setlocale(LC_ALL, "");
- while ((c=getopt(argc, argv,"he:igpPt:")) != -1) {
+ while ((c=getopt(argc, argv,"he:ifpPt:")) != -1) {
switch(c) {
case 'e':
- options.events = optarg;
+ if (options.num_groups < MAX_GROUPS) {
+ options.events[options.num_groups++] =
optarg;
+ } else {
+ errx(1, "you cannot specify more than
%d groups.\n",
+ MAX_GROUPS);
+ }
break;
- case 'g':
- options.group = 1;
+ case 'f':
+ options.format_group = 1;
break;
case 'p':
options.print = 1;
@@ -347,12 +363,13 @@ main(int argc, char **argv)
errx(1, "unknown error");
}
}
- if (!options.events)
- options.events =
"PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
-
+ if (options.num_groups == 0) {
+ options.events[0] =
"PERF_COUNT_HW_CPU_CYCLES,PERF_COUNT_HW_INSTRUCTIONS";
+ options.num_groups = 1;
+ }
if (!argv[optind] && !options.pid)
errx(1, "you must specify a command to execute or a thread to
attach to\n");
-
+
signal(SIGINT, sig_handler);
return parent(argv+optind);
diff --git a/perf_examples/task_attach_timeout.c
b/perf_examples/task_attach_timeout.c
index bfcb60d..a3bffb0 100644
--- a/perf_examples/task_attach_timeout.c
+++ b/perf_examples/task_attach_timeout.c
@@ -95,19 +95,19 @@ print_counts(perf_event_desc_t *fds, int num, int do_delta)
int
measure(pid_t pid)
{
- perf_event_desc_t *fds;
- int i, num;
+ perf_event_desc_t *fds = NULL;
+ int i, ret, num_fds = 0;
char fn[32];
if (pfm_initialize() != PFM_SUCCESS)
errx(1, "libpfm initialization failed\n");
- num = perf_setup_list_events(options.events, &fds);
- if (num < 1)
+ ret = perf_setup_list_events(options.events, &fds, &num_fds);
+ if (ret || (num_fds == 0))
exit(1);
fds[0].fd = -1;
- for(i=0; i < num; i++) {
+ for(i=0; i < num_fds; i++) {
fds[i].hw.disabled = 0; /* start immediately */
/* request timing information necessary for scaling counts */
@@ -130,15 +130,15 @@ measure(pid_t pid)
sleep(1);
options.delay--;
if (options.print)
- print_counts(fds, num, 1);
+ print_counts(fds, num_fds, 1);
}
if (options.delay)
warn("thread %d terminated before timeout", pid);
if (!options.print)
- print_counts(fds, num, 0);
+ print_counts(fds, num_fds, 0);
- for(i=0; i < num; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
free(fds);
diff --git a/perf_examples/task_smpl.c b/perf_examples/task_smpl.c
index 7d029db..2ceeb01 100644
--- a/perf_examples/task_smpl.c
+++ b/perf_examples/task_smpl.c
@@ -58,7 +58,7 @@ typedef struct {
static jmp_buf jbuf;
static uint64_t collected_samples, lost_samples;
static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
static options_t options;
static uint64_t sum_period;
@@ -293,7 +293,7 @@ display_sample(perf_event_desc_t *hw, struct
perf_event_header *ehdr)
sz -= sizeof(grp);
- e = perf_id2event(fds, num_events, grp.id);
+ e = perf_id2event(fds, num_fds, grp.id);
if (e == -1)
str = "unknown sample event";
else
@@ -395,7 +395,7 @@ display_lost(perf_event_desc_t *hw)
if (ret)
errx(1, "cannot read lost info");
- e = perf_id2event(fds, num_events, lost.id);
+ e = perf_id2event(fds, num_fds, lost.id);
if (e == -1)
str = "unknown lost event";
else
@@ -486,8 +486,8 @@ mainloop(char **arg)
/*
* does allocate fds
*/
- num_events = perf_setup_list_events(options.events, &fds);
- if (num_events == -1)
+ ret = perf_setup_list_events(options.events, &fds, &num_fds);
+ if (ret || !num_fds)
errx(1, "cannot setup event list");
memset(pollfds, 0, sizeof(pollfds));
@@ -512,7 +512,7 @@ mainloop(char **arg)
errx(1, "task %s [%d] exited already status %d\n", arg[0], pid,
WEXITSTATUS(status));
fds[0].fd = -1;
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
fds[i].hw.disabled = 0; /* start immediately */
@@ -538,7 +538,7 @@ mainloop(char **arg)
/* must get event id for SAMPLE_GROUP */
fds[i].hw.read_format = PERF_FORMAT_SCALE;
- if (num_events > 1)
+ if (num_fds > 1)
fds[i].hw.read_format |=
PERF_FORMAT_GROUP|PERF_FORMAT_ID;
}
@@ -573,7 +573,7 @@ mainloop(char **arg)
* We are skipping the first 3 values (nr, time_enabled, time_running)
* and then for each event we get a pair of values.
*/
- sz = (3+2*num_events)*sizeof(uint64_t);
+ sz = (3+2*num_fds)*sizeof(uint64_t);
val = malloc(sz);
if (!val)
err(1, "cannot allocate memory");
@@ -583,7 +583,7 @@ mainloop(char **arg)
err(1, "cannot read id %zu", sizeof(val));
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
fds[i].id = val[2*i+1+3];
printf("%"PRIu64" %s\n", fds[i].id, fds[i].name);
}
@@ -617,7 +617,7 @@ terminate_session:
*/
wait4(pid, &status, 0, NULL);
- for(i=0; i < num_events; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
/* check for partial event buffer */
diff --git a/perf_examples/x86/bts_smpl.c b/perf_examples/x86/bts_smpl.c
index 46237bd..b28fd06 100644
--- a/perf_examples/x86/bts_smpl.c
+++ b/perf_examples/x86/bts_smpl.c
@@ -59,7 +59,7 @@ typedef struct {
static jmp_buf jbuf;
static uint64_t collected_samples, lost_samples;
static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
static options_t options;
static struct option the_options[]={
@@ -172,7 +172,7 @@ display_lost(perf_event_desc_t *hw)
if (ret)
errx(1, "cannot read lost info");
- e = perf_id2event(fds, num_events, lost.id);
+ e = perf_id2event(fds, num_fds, lost.id);
if (e == -1)
str = "unknown lost event";
else
@@ -239,8 +239,8 @@ mainloop(char **arg)
/*
* does allocate fds
*/
- num_events =
perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds);
- if (num_events == -1)
+ ret = perf_setup_list_events("PERF_COUNT_HW_BRANCH_INSTRUCTIONS", &fds,
&num_fds);
+ if (ret || !num_fds)
errx(1, "cannot setup event");
memset(pollfds, 0, sizeof(pollfds));
diff --git a/perf_examples/x86/pebs_smpl.c b/perf_examples/x86/pebs_smpl.c
index 83ba098..f24aa44 100644
--- a/perf_examples/x86/pebs_smpl.c
+++ b/perf_examples/x86/pebs_smpl.c
@@ -54,7 +54,7 @@ typedef struct {
static jmp_buf jbuf;
static uint64_t collected_samples, lost_samples;
static perf_event_desc_t *fds;
-static int num_events;
+static int num_fds;
static options_t options;
static uint64_t sum_period;
@@ -353,7 +353,7 @@ display_sample(perf_event_desc_t *hw, struct
perf_event_header *ehdr)
sz -= sizeof(grp);
- e = perf_id2event(fds, num_events, grp.id);
+ e = perf_id2event(fds, num_fds, grp.id);
if (e == -1)
str = "unknown sample event";
else
@@ -419,7 +419,7 @@ display_lost(perf_event_desc_t *hw)
if (ret)
errx(1, "cannot read lost info");
- e = perf_id2event(fds, num_events, lost.id);
+ e = perf_id2event(fds, num_fds, lost.id);
if (e == -1)
str = "unknown lost event";
else
@@ -510,8 +510,8 @@ mainloop(char **arg)
/*
* does allocate fds
*/
- num_events = perf_setup_list_events(options.events, &fds);
- if (num_events == -1)
+ ret = perf_setup_list_events(options.events, &fds, &num_fds);
+ if (ret || !num_fds)
errx(1, "cannot setup event list");
memset(pollfds, 0, sizeof(pollfds));
@@ -536,7 +536,7 @@ mainloop(char **arg)
errx(1, "task %s [%d] exited already status %d\n", arg[0], pid,
WEXITSTATUS(status));
fds[0].fd = -1;
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
fds[i].hw.disabled = 0; /* start immediately */
@@ -595,7 +595,7 @@ mainloop(char **arg)
* We are skipping the first 3 values (nr, time_enabled, time_running)
* and then for each event we get a pair of values.
*/
- sz = (3+2*num_events)*sizeof(uint64_t);
+ sz = (3+2*num_fds)*sizeof(uint64_t);
val = malloc(sz);
if (!val)
err(1, "cannot allocated memory");
@@ -605,7 +605,7 @@ mainloop(char **arg)
err(1, "cannot read id %zu", sizeof(val));
- for(i=0; i < num_events; i++) {
+ for(i=0; i < num_fds; i++) {
fds[i].id = val[2*i+1+3];
printf("%"PRIu64" %s\n", fds[i].id, fds[i].name);
}
@@ -639,7 +639,7 @@ terminate_session:
*/
wait4(pid, &status, 0, NULL);
- for(i=0; i < num_events; i++)
+ for(i=0; i < num_fds; i++)
close(fds[i].fd);
/* check for partial event buffer */
------------------------------------------------------------------------------
_______________________________________________
perfmon2-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/perfmon2-devel