Module Name: src Committed By: maxv Date: Wed Jul 12 17:33:29 UTC 2017
Modified Files: src/sys/arch/x86/include: sysarch.h src/sys/arch/x86/x86: pmc.c src/usr.bin/pmc: pmc.c Log Message: Properly handle overflows, and take them into account in userland. To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 src/sys/arch/x86/include/sysarch.h cvs rdiff -u -r1.9 -r1.10 src/sys/arch/x86/x86/pmc.c cvs rdiff -u -r1.24 -r1.25 src/usr.bin/pmc/pmc.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/include/sysarch.h diff -u src/sys/arch/x86/include/sysarch.h:1.11 src/sys/arch/x86/include/sysarch.h:1.12 --- src/sys/arch/x86/include/sysarch.h:1.11 Fri Mar 10 13:09:11 2017 +++ src/sys/arch/x86/include/sysarch.h Wed Jul 12 17:33:29 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: sysarch.h,v 1.11 2017/03/10 13:09:11 maxv Exp $ */ +/* $NetBSD: sysarch.h,v 1.12 2017/07/12 17:33:29 maxv Exp $ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. @@ -134,6 +134,7 @@ struct _X86_SYSARCH_L(pmc_info_args) { int vers; int type; uint32_t nctrs; + uint64_t nsamp; }; #define PMC_VERSION 1 Index: src/sys/arch/x86/x86/pmc.c diff -u src/sys/arch/x86/x86/pmc.c:1.9 src/sys/arch/x86/x86/pmc.c:1.10 --- src/sys/arch/x86/x86/pmc.c:1.9 Wed Jul 12 16:59:41 2017 +++ src/sys/arch/x86/x86/pmc.c Wed Jul 12 17:33:29 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: pmc.c,v 1.9 2017/07/12 16:59:41 maxv Exp $ */ +/* $NetBSD: pmc.c,v 1.10 2017/07/12 17:33:29 maxv Exp $ */ /* * Copyright (c) 2017 The NetBSD Foundation, Inc. @@ -63,11 +63,17 @@ */ /* - * Interface to x86 CPU Performance Counters. + * Interface to x86 CPU Performance Counters. System-wide only, for now. + * + * For each PMC on each CPU, two pieces of information are returned to userland: + * the number of overflows, and the current value of the PMC. It means that the + * total number of events for the given PMC on the given CPU is computable the + * following way: + * tot_n_events = NEVENTS_SAMPLE * overfl + ctrval */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.9 2017/07/12 16:59:41 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.10 2017/07/12 17:33:29 maxv Exp $"); #include "opt_pmc.h" @@ -83,7 +89,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.9 #include <machine/specialreg.h> #include <machine/sysarch.h> #include <machine/pmc.h> -#include <machine/cpu_counter.h> #include <machine/cputypes.h> #include <machine/i82489reg.h> #include <machine/i82489var.h> @@ -92,8 +97,12 @@ __KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.9 #define NEVENTS_SAMPLE 500000 +/* + * Structure describing a PMC. + */ typedef struct { bool running; + size_t n; /* pmc number */ uint32_t evtmsr; /* event selector MSR */ uint64_t evtval; /* event selector value */ uint32_t ctrmsr; /* counter MSR */ @@ -102,21 +111,36 @@ typedef struct { uint64_t ctrmask; } pmc_state_t; -static nmi_handler_t *pmc_nmi_handle; -static uint32_t pmc_lapic_image[MAXCPUS]; +/* + * Per-CPU structure that describes the values of each PMC, plus the state + * of the LAPIC before enabling PMCs. + */ +typedef struct { + x86_pmc_cpuval_t val[PMC_NCOUNTERS]; /* values returned to user */ + uint64_t nmioverfl[PMC_NCOUNTERS]; /* incremented by NMI intr */ + uint32_t lapic_image; /* saved content of LAPIC */ +} pmc_cpu_t; -static x86_pmc_cpuval_t pmc_val_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE); +static pmc_state_t pmc_state[PMC_NCOUNTERS]; +static pmc_cpu_t pmc_cpu[MAXCPUS]; + +static nmi_handler_t *pmc_nmi_handle; static kmutex_t pmc_lock; -static pmc_state_t pmc_state[PMC_NCOUNTERS]; static uint32_t pmc_ncounters __read_mostly; static int pmc_type __read_mostly; +/* + * Handle PMC overflows. Called from NMI interrupt context, with interrupts + * disabled. + */ static int pmc_nmi(const struct trapframe *tf, void *dummy) { struct cpu_info *ci = curcpu(); pmc_state_t *pmc; + pmc_cpu_t *cpu; + uint64_t ctr; size_t i; if (pmc_type == PMC_TYPE_NONE) { @@ -127,7 +151,11 @@ pmc_nmi(const struct trapframe *tf, void if (!pmc->running) { continue; } - /* XXX make sure it really comes from this PMC */ + ctr = rdmsr(pmc->ctrmsr); + /* If the highest bit is zero, then it's this PMC */ + if ((ctr & ((pmc->ctrmask + 1) >> 1)) != 0) { + continue; + } break; } if (i == pmc_ncounters) { @@ -135,7 +163,8 @@ pmc_nmi(const struct trapframe *tf, void } /* Count the overflow, and restart the counter */ - pmc_val_cpus[cpu_index(ci)].overfl++; + cpu = &pmc_cpu[cpu_index(ci)]; + cpu->nmioverfl[i]++; wrmsr(pmc->ctrmsr, pmc->ctrinitval); return 1; @@ -146,9 +175,37 @@ pmc_read_cpu(void *arg1, void *arg2) { pmc_state_t *pmc = (pmc_state_t *)arg1; struct cpu_info *ci = curcpu(); + pmc_cpu_t *cpu = &pmc_cpu[cpu_index(ci)]; + uint64_t evtmsr, en; + + switch (pmc_type) { + case PMC_TYPE_I686: + en = PMC6_EVTSEL_EN; + break; - pmc_val_cpus[cpu_index(ci)].ctrval = + case PMC_TYPE_K7: + en = K7_EVTSEL_EN; + break; + + case PMC_TYPE_F10H: + en = F10H_EVTSEL_EN; + break; + } + + evtmsr = rdmsr(pmc->evtmsr); + + /* + * Quickly disable the counter, to avoid getting an NMI after setting + * ctrval. + */ + wrmsr(pmc->evtmsr, evtmsr & ~en); + + cpu->val[pmc->n].ctrval = (rdmsr(pmc->ctrmsr) & pmc->ctrmask) - pmc->ctrinitval; + cpu->val[pmc->n].overfl = cpu->nmioverfl[pmc->n]; + + /* Re-enable the counter */ + wrmsr(pmc->evtmsr, evtmsr); } static void @@ -157,9 +214,14 @@ pmc_apply_cpu(void *arg1, void *arg2) pmc_state_t *pmc = (pmc_state_t *)arg1; bool start = (bool)arg2; struct cpu_info *ci = curcpu(); + pmc_cpu_t *cpu = &pmc_cpu[cpu_index(ci)]; if (start) { - pmc_lapic_image[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT); + cpu->lapic_image = lapic_readreg(LAPIC_PCINT); + cpu->val[pmc->n].ctrval = 0; + cpu->val[pmc->n].overfl = 0; + cpu->nmioverfl[pmc->n] = 0; + lapic_writereg(LAPIC_PCINT, LAPIC_DLMODE_NMI); } @@ -172,11 +234,8 @@ pmc_apply_cpu(void *arg1, void *arg2) break; } - pmc_val_cpus[cpu_index(ci)].ctrval = 0; - pmc_val_cpus[cpu_index(ci)].overfl = 0; - if (!start) { - lapic_writereg(LAPIC_PCINT, pmc_lapic_image[cpu_index(ci)]); + lapic_writereg(LAPIC_PCINT, cpu->lapic_image); } } @@ -290,6 +349,7 @@ pmc_init(void) pmc_type = PMC_TYPE_I686; pmc_ncounters = 2; for (i = 0; i < pmc_ncounters; i++) { + pmc_state[i].n = i; pmc_state[i].evtmsr = MSR_EVNTSEL0 + i; pmc_state[i].ctrmsr = MSR_PERFCTR0 + i; pmc_state[i].ctrmaxval = (UINT64_C(1) << 40) - 1; @@ -300,6 +360,7 @@ pmc_init(void) pmc_type = PMC_TYPE_F10H; pmc_ncounters = 4; for (i = 0; i < pmc_ncounters; i++) { + pmc_state[i].n = i; pmc_state[i].evtmsr = MSR_F10H_EVNTSEL0 + i; pmc_state[i].ctrmsr = MSR_F10H_PERFCTR0 + i; pmc_state[i].ctrmaxval = @@ -311,6 +372,7 @@ pmc_init(void) pmc_type = PMC_TYPE_K7; pmc_ncounters = 4; for (i = 0; i < pmc_ncounters; i++) { + pmc_state[i].n = i; pmc_state[i].evtmsr = MSR_K7_EVNTSEL0 + i; pmc_state[i].ctrmsr = MSR_K7_PERFCTR0 + i; pmc_state[i].ctrmaxval = @@ -340,6 +402,7 @@ sys_pmc_info(struct lwp *l, struct x86_p rv.vers = PMC_VERSION; rv.type = pmc_type; rv.nctrs = pmc_ncounters; + rv.nsamp = NEVENTS_SAMPLE; return copyout(&rv, uargs, sizeof(rv)); } @@ -397,7 +460,8 @@ sys_pmc_read(struct lwp *l, struct x86_p { struct x86_pmc_read_args args; pmc_state_t *pmc; - size_t nval; + pmc_cpu_t *cpu; + size_t i, nval; int error; error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_X86PMC, @@ -424,8 +488,16 @@ sys_pmc_read(struct lwp *l, struct x86_p if (pmc->running) { pmc_read(pmc); - error = copyout(&pmc_val_cpus, args.values, - nval * sizeof(x86_pmc_cpuval_t)); + + for (i = 0; i < nval; i++) { + cpu = &pmc_cpu[i]; + + error = copyout(&cpu->val[pmc->n], args.values + i, + sizeof(x86_pmc_cpuval_t)); + if (error) + break; + } + args.nval = nval; } else { error = ENOENT; Index: src/usr.bin/pmc/pmc.c diff -u src/usr.bin/pmc/pmc.c:1.24 src/usr.bin/pmc/pmc.c:1.25 --- src/usr.bin/pmc/pmc.c:1.24 Wed Jun 14 17:54:01 2017 +++ src/usr.bin/pmc/pmc.c Wed Jul 12 17:33:29 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: pmc.c,v 1.24 2017/06/14 17:54:01 maxv Exp $ */ +/* $NetBSD: pmc.c,v 1.25 2017/07/12 17:33:29 maxv Exp $ */ /* * Copyright (c) 2017 The NetBSD Foundation, Inc. @@ -66,7 +66,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: pmc.c,v 1.24 2017/06/14 17:54:01 maxv Exp $"); +__RCSID("$NetBSD: pmc.c,v 1.25 2017/07/12 17:33:29 maxv Exp $"); #endif #include <inttypes.h> @@ -379,6 +379,7 @@ static int x86_pmc_startstop(x86_pmc_sta static int x86_pmc_read(x86_pmc_read_args_t *); static uint32_t pmc_ncounters; +static size_t pmc_nsamples; static struct cmdtab { const char *label; @@ -428,6 +429,7 @@ pmc_start(const pmc_name2val_cpu_t *pncp uint32_t flags; size_t n, i; + /* Get the source for each counter (kernel or userland) */ for (n = 0; n < pmc_ncounters; n++) { if (argv[n] == NULL) break; @@ -437,6 +439,7 @@ pmc_start(const pmc_name2val_cpu_t *pncp usage(); } + /* Initialize each pmcarg structure */ for (i = 0; i < n; i++) { pmcarg = &pmcargs[i]; event = tokens[i][0]; @@ -460,6 +463,7 @@ pmc_start(const pmc_name2val_cpu_t *pncp pmcarg->flags = flags; } + /* Finally, start each counter */ for (i = 0; i < n; i++) { pmcarg = &pmcargs[i]; if (x86_pmc_startstop(pmcarg) < 0) @@ -475,6 +479,7 @@ pmc_stop(const pmc_name2val_cpu_t *pncp, x86_pmc_startstop_args_t pmcstop; x86_pmc_read_args_t pmcread; size_t i, j, n, nval = 0; + uint64_t val; /* Read the values. */ for (n = 0; n < pmc_ncounters; n++) { @@ -510,7 +515,9 @@ pmc_stop(const pmc_name2val_cpu_t *pncp, for (i = 0; i < n; i++) { printf("%zu\t\t", i); for (j = 0; j < nval; j++) { - printf("%" PRIu64 "\t\t", cpuval[i][j].ctrval); + val = cpuval[i][j].overfl * pmc_nsamples + + cpuval[i][j].ctrval; + printf("%" PRIu64 "\t\t", val); } printf("\n"); } @@ -599,6 +606,7 @@ main(int argc, char **argv) if (pmcinfo.vers != 1) errx(EXIT_FAILURE, "Wrong PMC version"); pmc_ncounters = pmcinfo.nctrs; + pmc_nsamples = pmcinfo.nsamp; pncp = pmc_lookup_cpu(pmcinfo.type); if (pncp == NULL)