Module Name:    src
Committed By:   maxv
Date:           Wed Jul 12 17:33:29 UTC 2017

Modified Files:
        src/sys/arch/x86/include: sysarch.h
        src/sys/arch/x86/x86: pmc.c
        src/usr.bin/pmc: pmc.c

Log Message:
Properly handle overflows, and take them into account in userland.


To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/arch/x86/include/sysarch.h
cvs rdiff -u -r1.9 -r1.10 src/sys/arch/x86/x86/pmc.c
cvs rdiff -u -r1.24 -r1.25 src/usr.bin/pmc/pmc.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/include/sysarch.h
diff -u src/sys/arch/x86/include/sysarch.h:1.11 src/sys/arch/x86/include/sysarch.h:1.12
--- src/sys/arch/x86/include/sysarch.h:1.11	Fri Mar 10 13:09:11 2017
+++ src/sys/arch/x86/include/sysarch.h	Wed Jul 12 17:33:29 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: sysarch.h,v 1.11 2017/03/10 13:09:11 maxv Exp $	*/
+/*	$NetBSD: sysarch.h,v 1.12 2017/07/12 17:33:29 maxv Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -134,6 +134,7 @@ struct _X86_SYSARCH_L(pmc_info_args) {
 	int vers;
 	int type;
 	uint32_t nctrs;
+	uint64_t nsamp;
 };
 
 #define	PMC_VERSION		1

Index: src/sys/arch/x86/x86/pmc.c
diff -u src/sys/arch/x86/x86/pmc.c:1.9 src/sys/arch/x86/x86/pmc.c:1.10
--- src/sys/arch/x86/x86/pmc.c:1.9	Wed Jul 12 16:59:41 2017
+++ src/sys/arch/x86/x86/pmc.c	Wed Jul 12 17:33:29 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmc.c,v 1.9 2017/07/12 16:59:41 maxv Exp $	*/
+/*	$NetBSD: pmc.c,v 1.10 2017/07/12 17:33:29 maxv Exp $	*/
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc.
@@ -63,11 +63,17 @@
  */
 
 /*
- * Interface to x86 CPU Performance Counters.
+ * Interface to x86 CPU Performance Counters. System-wide only, for now.
+ *
+ * For each PMC on each CPU, two pieces of information are returned to userland:
+ * the number of overflows, and the current value of the PMC. It means that the
+ * total number of events for the given PMC on the given CPU is computable the
+ * following way:
+ *     tot_n_events = NEVENTS_SAMPLE * overfl + ctrval
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.9 2017/07/12 16:59:41 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.10 2017/07/12 17:33:29 maxv Exp $");
 
 #include "opt_pmc.h"
 
@@ -83,7 +89,6 @@ __KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.9 
 #include <machine/specialreg.h>
 #include <machine/sysarch.h>
 #include <machine/pmc.h>
-#include <machine/cpu_counter.h>
 #include <machine/cputypes.h>
 #include <machine/i82489reg.h>
 #include <machine/i82489var.h>
@@ -92,8 +97,12 @@ __KERNEL_RCSID(0, "$NetBSD: pmc.c,v 1.9 
 
 #define NEVENTS_SAMPLE	500000
 
+/*
+ * Structure describing a PMC.
+ */
 typedef struct {
 	bool running;
+	size_t n;		/* pmc number */
 	uint32_t evtmsr;	/* event selector MSR */
 	uint64_t evtval;	/* event selector value */
 	uint32_t ctrmsr;	/* counter MSR */
@@ -102,21 +111,36 @@ typedef struct {
 	uint64_t ctrmask;
 } pmc_state_t;
 
-static nmi_handler_t *pmc_nmi_handle;
-static uint32_t pmc_lapic_image[MAXCPUS];
+/*
+ * Per-CPU structure that describes the values of each PMC, plus the state
+ * of the LAPIC before enabling PMCs.
+ */
+typedef struct {
+	x86_pmc_cpuval_t val[PMC_NCOUNTERS];	/* values returned to user */ 
+	uint64_t nmioverfl[PMC_NCOUNTERS];	/* incremented by NMI intr */
+	uint32_t lapic_image;			/* saved content of LAPIC */
+} pmc_cpu_t;
 
-static x86_pmc_cpuval_t pmc_val_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
+static pmc_state_t pmc_state[PMC_NCOUNTERS];
+static pmc_cpu_t pmc_cpu[MAXCPUS];
+
+static nmi_handler_t *pmc_nmi_handle;
 static kmutex_t pmc_lock;
 
-static pmc_state_t pmc_state[PMC_NCOUNTERS];
 static uint32_t pmc_ncounters __read_mostly;
 static int pmc_type __read_mostly;
 
+/*
+ * Handle PMC overflows. Called from NMI interrupt context, with interrupts
+ * disabled.
+ */
 static int
 pmc_nmi(const struct trapframe *tf, void *dummy)
 {
 	struct cpu_info *ci = curcpu();
 	pmc_state_t *pmc;
+	pmc_cpu_t *cpu;
+	uint64_t ctr;
 	size_t i;
 
 	if (pmc_type == PMC_TYPE_NONE) {
@@ -127,7 +151,11 @@ pmc_nmi(const struct trapframe *tf, void
 		if (!pmc->running) {
 			continue;
 		}
-		/* XXX make sure it really comes from this PMC */
+		ctr = rdmsr(pmc->ctrmsr);
+		/* If the highest bit is zero, then it's this PMC */
+		if ((ctr & ((pmc->ctrmask + 1) >> 1)) != 0) {
+			continue;
+		}
 		break;
 	}
 	if (i == pmc_ncounters) {
@@ -135,7 +163,8 @@ pmc_nmi(const struct trapframe *tf, void
 	}
 
 	/* Count the overflow, and restart the counter */
-	pmc_val_cpus[cpu_index(ci)].overfl++;
+	cpu = &pmc_cpu[cpu_index(ci)];
+	cpu->nmioverfl[i]++;
 	wrmsr(pmc->ctrmsr, pmc->ctrinitval);
 
 	return 1;
@@ -146,9 +175,37 @@ pmc_read_cpu(void *arg1, void *arg2)
 {
 	pmc_state_t *pmc = (pmc_state_t *)arg1;
 	struct cpu_info *ci = curcpu();
+	pmc_cpu_t *cpu = &pmc_cpu[cpu_index(ci)];
+	uint64_t evtmsr, en;
+
+	switch (pmc_type) {
+	case PMC_TYPE_I686:
+		en = PMC6_EVTSEL_EN;
+		break;
 
-	pmc_val_cpus[cpu_index(ci)].ctrval =
+	case PMC_TYPE_K7:
+		en = K7_EVTSEL_EN;
+		break;
+
+	case PMC_TYPE_F10H:
+		en = F10H_EVTSEL_EN;
+		break;
+	}
+
+	evtmsr = rdmsr(pmc->evtmsr);
+
+	/*
+	 * Quickly disable the counter, to avoid getting an NMI after setting
+	 * ctrval.
+	 */
+	wrmsr(pmc->evtmsr, evtmsr & ~en);
+
+	cpu->val[pmc->n].ctrval =
 	    (rdmsr(pmc->ctrmsr) & pmc->ctrmask) - pmc->ctrinitval;
+	cpu->val[pmc->n].overfl = cpu->nmioverfl[pmc->n];
+
+	/* Re-enable the counter */
+	wrmsr(pmc->evtmsr, evtmsr);
 }
 
 static void
@@ -157,9 +214,14 @@ pmc_apply_cpu(void *arg1, void *arg2)
 	pmc_state_t *pmc = (pmc_state_t *)arg1;
 	bool start = (bool)arg2;
 	struct cpu_info *ci = curcpu();
+	pmc_cpu_t *cpu = &pmc_cpu[cpu_index(ci)];
 
 	if (start) {
-		pmc_lapic_image[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT);
+		cpu->lapic_image = lapic_readreg(LAPIC_PCINT);
+		cpu->val[pmc->n].ctrval = 0;
+		cpu->val[pmc->n].overfl = 0;
+		cpu->nmioverfl[pmc->n] = 0;
+
 		lapic_writereg(LAPIC_PCINT, LAPIC_DLMODE_NMI);
 	}
 
@@ -172,11 +234,8 @@ pmc_apply_cpu(void *arg1, void *arg2)
 		break;
 	}
 
-	pmc_val_cpus[cpu_index(ci)].ctrval = 0;
-	pmc_val_cpus[cpu_index(ci)].overfl = 0;
-
 	if (!start) {
-		lapic_writereg(LAPIC_PCINT, pmc_lapic_image[cpu_index(ci)]);
+		lapic_writereg(LAPIC_PCINT, cpu->lapic_image);
 	}
 }
 
@@ -290,6 +349,7 @@ pmc_init(void)
 		pmc_type = PMC_TYPE_I686;
 		pmc_ncounters = 2;
 		for (i = 0; i < pmc_ncounters; i++) {
+			pmc_state[i].n = i;
 			pmc_state[i].evtmsr = MSR_EVNTSEL0 + i;
 			pmc_state[i].ctrmsr = MSR_PERFCTR0 + i;
 			pmc_state[i].ctrmaxval = (UINT64_C(1) << 40) - 1;
@@ -300,6 +360,7 @@ pmc_init(void)
 			pmc_type = PMC_TYPE_F10H;
 			pmc_ncounters = 4;
 			for (i = 0; i < pmc_ncounters; i++) {
+				pmc_state[i].n = i;
 				pmc_state[i].evtmsr = MSR_F10H_EVNTSEL0 + i;
 				pmc_state[i].ctrmsr = MSR_F10H_PERFCTR0 + i;
 				pmc_state[i].ctrmaxval =
@@ -311,6 +372,7 @@ pmc_init(void)
 			pmc_type = PMC_TYPE_K7;
 			pmc_ncounters = 4;
 			for (i = 0; i < pmc_ncounters; i++) {
+				pmc_state[i].n = i;
 				pmc_state[i].evtmsr = MSR_K7_EVNTSEL0 + i;
 				pmc_state[i].ctrmsr = MSR_K7_PERFCTR0 + i;
 				pmc_state[i].ctrmaxval =
@@ -340,6 +402,7 @@ sys_pmc_info(struct lwp *l, struct x86_p
 	rv.vers = PMC_VERSION;
 	rv.type = pmc_type;
 	rv.nctrs = pmc_ncounters;
+	rv.nsamp = NEVENTS_SAMPLE;
 
 	return copyout(&rv, uargs, sizeof(rv));
 }
@@ -397,7 +460,8 @@ sys_pmc_read(struct lwp *l, struct x86_p
 {
 	struct x86_pmc_read_args args;
 	pmc_state_t *pmc;
-	size_t nval;
+	pmc_cpu_t *cpu;
+	size_t i, nval;
 	int error;
 
 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_X86PMC,
@@ -424,8 +488,16 @@ sys_pmc_read(struct lwp *l, struct x86_p
 
 	if (pmc->running) {
 		pmc_read(pmc);
-		error = copyout(&pmc_val_cpus, args.values,
-		    nval * sizeof(x86_pmc_cpuval_t));
+
+		for (i = 0; i < nval; i++) {
+			cpu = &pmc_cpu[i];
+
+			error = copyout(&cpu->val[pmc->n], args.values + i,
+			    sizeof(x86_pmc_cpuval_t));
+			if (error)
+				break;
+		}
+
 		args.nval = nval;
 	} else {
 		error = ENOENT;

Index: src/usr.bin/pmc/pmc.c
diff -u src/usr.bin/pmc/pmc.c:1.24 src/usr.bin/pmc/pmc.c:1.25
--- src/usr.bin/pmc/pmc.c:1.24	Wed Jun 14 17:54:01 2017
+++ src/usr.bin/pmc/pmc.c	Wed Jul 12 17:33:29 2017
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmc.c,v 1.24 2017/06/14 17:54:01 maxv Exp $	*/
+/*	$NetBSD: pmc.c,v 1.25 2017/07/12 17:33:29 maxv Exp $	*/
 
 /*
  * Copyright (c) 2017 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
 #include <sys/cdefs.h>
 
 #ifndef lint
-__RCSID("$NetBSD: pmc.c,v 1.24 2017/06/14 17:54:01 maxv Exp $");
+__RCSID("$NetBSD: pmc.c,v 1.25 2017/07/12 17:33:29 maxv Exp $");
 #endif
 
 #include <inttypes.h>
@@ -379,6 +379,7 @@ static int x86_pmc_startstop(x86_pmc_sta
 static int x86_pmc_read(x86_pmc_read_args_t *);
 
 static uint32_t pmc_ncounters;
+static size_t pmc_nsamples;
 
 static struct cmdtab {
 	const char *label;
@@ -428,6 +429,7 @@ pmc_start(const pmc_name2val_cpu_t *pncp
 	uint32_t flags;
 	size_t n, i;
 
+	/* Get the source for each counter (kernel or userland) */
 	for (n = 0; n < pmc_ncounters; n++) {
 		if (argv[n] == NULL)
 			break;
@@ -437,6 +439,7 @@ pmc_start(const pmc_name2val_cpu_t *pncp
 			usage();
 	}
 
+	/* Initialize each pmcarg structure */
 	for (i = 0; i < n; i++) {
 		pmcarg = &pmcargs[i];
 		event = tokens[i][0];
@@ -460,6 +463,7 @@ pmc_start(const pmc_name2val_cpu_t *pncp
 		pmcarg->flags = flags;
 	}
 
+	/* Finally, start each counter */
 	for (i = 0; i < n; i++) {
 		pmcarg = &pmcargs[i];
 		if (x86_pmc_startstop(pmcarg) < 0)
@@ -475,6 +479,7 @@ pmc_stop(const pmc_name2val_cpu_t *pncp,
 	x86_pmc_startstop_args_t pmcstop;
 	x86_pmc_read_args_t pmcread;
 	size_t i, j, n, nval = 0;
+	uint64_t val;
 
 	/* Read the values. */
 	for (n = 0; n < pmc_ncounters; n++) {
@@ -510,7 +515,9 @@ pmc_stop(const pmc_name2val_cpu_t *pncp,
 	for (i = 0; i < n; i++) {
 		printf("%zu\t\t", i);
 		for (j = 0; j < nval; j++) {
-			printf("%" PRIu64 "\t\t", cpuval[i][j].ctrval);
+			val = cpuval[i][j].overfl * pmc_nsamples +
+			    cpuval[i][j].ctrval;
+			printf("%" PRIu64 "\t\t", val);
 		}
 		printf("\n");
 	}
@@ -599,6 +606,7 @@ main(int argc, char **argv)
 	if (pmcinfo.vers != 1)
 		errx(EXIT_FAILURE, "Wrong PMC version");
 	pmc_ncounters = pmcinfo.nctrs;
+	pmc_nsamples = pmcinfo.nsamp;
 
 	pncp = pmc_lookup_cpu(pmcinfo.type);
 	if (pncp == NULL)

Reply via email to