Module Name: src Committed By: ryo Date: Thu Dec 1 00:32:52 UTC 2022
Modified Files: src/sys/arch/aarch64/include: armreg.h src/sys/dev/tprof: tprof.c tprof.h tprof_armv7.c tprof_armv8.c tprof_ioctl.h tprof_types.h tprof_x86.c tprof_x86_amd.c tprof_x86_intel.c src/usr.sbin/tprof: tprof.8 tprof.c tprof_analyze.c Log Message: Improve tprof(4) - Multiple events can now be handled simultaneously. - Counters should be configured with TPROF_IOC_CONFIGURE_EVENT in advance, instead of being configured at TPROF_IOC_START. - The configured counters can be started and stopped repeatedly by PROF_IOC_START/TPROF_IOC_STOP. - The value of the performance counter can be obtained at any timing as a 64bit value with TPROF_IOC_GETCOUNTS. - Backend common parts are handled in tprof.c as much as possible, and functions on the tprof_backend side have been reimplemented to be more primitive. - The reset value of counter overflows for profiling can now be adjusted. It is calculated by default from the CPU clock (speed of cycle counter) and TPROF_HZ, but for some events the value may be too large to be sufficient for profiling. The event counter can be specified as a ratio to the default or as an absolute value when configuring the event counter. - Due to overall changes, API and ABI have been changed. TPROF_VERSION and TPROF_BACKEND_VERSION were updated. To generate a diff of this commit: cvs rdiff -u -r1.62 -r1.63 src/sys/arch/aarch64/include/armreg.h cvs rdiff -u -r1.18 -r1.19 src/sys/dev/tprof/tprof.c cvs rdiff -u -r1.6 -r1.7 src/sys/dev/tprof/tprof.h cvs rdiff -u -r1.9 -r1.10 src/sys/dev/tprof/tprof_armv7.c cvs rdiff -u -r1.17 -r1.18 src/sys/dev/tprof/tprof_armv8.c cvs rdiff -u -r1.4 -r1.5 src/sys/dev/tprof/tprof_ioctl.h \ src/sys/dev/tprof/tprof_x86_intel.c cvs rdiff -u -r1.5 -r1.6 src/sys/dev/tprof/tprof_types.h \ src/sys/dev/tprof/tprof_x86_amd.c cvs rdiff -u -r1.1 -r1.2 src/sys/dev/tprof/tprof_x86.c cvs rdiff -u -r1.16 -r1.17 src/usr.sbin/tprof/tprof.8 cvs rdiff -u -r1.13 -r1.14 src/usr.sbin/tprof/tprof.c cvs rdiff -u -r1.5 -r1.6 src/usr.sbin/tprof/tprof_analyze.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/include/armreg.h diff -u src/sys/arch/aarch64/include/armreg.h:1.62 src/sys/arch/aarch64/include/armreg.h:1.63 --- src/sys/arch/aarch64/include/armreg.h:1.62 Thu Dec 1 00:29:10 2022 +++ src/sys/arch/aarch64/include/armreg.h Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: armreg.h,v 1.62 2022/12/01 00:29:10 ryo Exp $ */ +/* $NetBSD: armreg.h,v 1.63 2022/12/01 00:32:52 ryo Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -248,6 +248,10 @@ AARCH64REG_READ_INLINE(id_aa64dfr0_el1) #define ID_AA64DFR0_EL1_PMUVER_NONE 0 #define ID_AA64DFR0_EL1_PMUVER_V3 1 #define ID_AA64DFR0_EL1_PMUVER_NOV3 2 +#define ID_AA64DFR0_EL1_PMUVER_V3P1 4 +#define ID_AA64DFR0_EL1_PMUVER_V3P4 5 +#define ID_AA64DFR0_EL1_PMUVER_V3P5 6 +#define ID_AA64DFR0_EL1_PMUVER_V3P7 7 #define ID_AA64DFR0_EL1_PMUVER_IMPL 15 #define ID_AA64DFR0_EL1_TRACEVER __BITS(4,7) #define ID_AA64DFR0_EL1_TRACEVER_NONE 0 @@ -1221,6 +1225,7 @@ AARCH64REG_WRITE_INLINE(pmcr_el0) #define PMCR_IMP __BITS(31,24) // Implementor code #define PMCR_IDCODE __BITS(23,16) // Identification code #define PMCR_N __BITS(15,11) // Number of event counters +#define PMCR_LP __BIT(7) // Long event counter enable #define PMCR_LC __BIT(6) // Long cycle counter enable #define PMCR_DP __BIT(5) // Disable cycle counter when event // counting is prohibited Index: src/sys/dev/tprof/tprof.c diff -u src/sys/dev/tprof/tprof.c:1.18 src/sys/dev/tprof/tprof.c:1.19 --- src/sys/dev/tprof/tprof.c:1.18 Thu Dec 1 00:27:59 2022 +++ src/sys/dev/tprof/tprof.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $ */ +/* $NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $ */ /*- * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.18 2022/12/01 00:27:59 ryo Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.19 2022/12/01 00:32:52 ryo Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -42,12 +42,17 @@ __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1. #include <sys/proc.h> #include <sys/queue.h> #include <sys/workqueue.h> +#include <sys/xcall.h> #include <dev/tprof/tprof.h> #include <dev/tprof/tprof_ioctl.h> #include "ioconf.h" +#ifndef TPROF_HZ +#define TPROF_HZ 10000 +#endif + /* * locking order: * tprof_reader_lock -> tprof_lock @@ -73,7 +78,7 @@ typedef struct tprof_buf { } tprof_buf_t; #define TPROF_BUF_BYTESIZE(sz) \ (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) -#define TPROF_MAX_SAMPLES_PER_BUF 10000 +#define TPROF_MAX_SAMPLES_PER_BUF (TPROF_HZ * 2) #define TPROF_MAX_BUF 100 @@ -85,14 +90,20 @@ typedef struct { } __aligned(CACHE_LINE_SIZE) tprof_cpu_t; typedef struct tprof_backend { + /* + * tprof_backend_softc_t must be passed as an argument to the interrupt + * handler, but since this is difficult to implement in armv7/v8. Then, + * tprof_backend is exposed. Additionally, softc must be placed at the + * beginning of struct tprof_backend. + */ + tprof_backend_softc_t tb_softc; + const char *tb_name; const tprof_backend_ops_t *tb_ops; LIST_ENTRY(tprof_backend) tb_list; - int tb_usecount; /* S: */ } tprof_backend_t; static kmutex_t tprof_lock; -static bool tprof_running; /* s: */ static u_int tprof_nworker; /* L: # of running worker LWPs */ static lwp_t *tprof_owner; static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */ @@ -101,7 +112,7 @@ static struct workqueue *tprof_wq; static struct percpu *tprof_cpus __read_mostly; /* tprof_cpu_t * */ static u_int tprof_samples_per_buf; -static tprof_backend_t *tprof_backend; /* S: */ +tprof_backend_t *tprof_backend; /* S: */ static LIST_HEAD(, tprof_backend) tprof_backends = LIST_HEAD_INITIALIZER(tprof_backend); /* S: */ @@ -193,6 +204,7 @@ tprof_worker(struct work *wk, void *dumm { tprof_cpu_t * const c = tprof_curcpu(); tprof_buf_t *buf; + tprof_backend_t *tb; bool shouldstop; KASSERT(wk == &c->c_work); @@ -207,7 +219,8 @@ tprof_worker(struct work *wk, void *dumm * and put it on the global list for read(2). */ mutex_enter(&tprof_lock); - shouldstop = !tprof_running; + tb = tprof_backend; + shouldstop = (tb == NULL || tb->tb_softc.sc_ctr_running_mask == 0); if (shouldstop) { KASSERT(tprof_nworker > 0); tprof_nworker--; @@ -283,103 +296,352 @@ tprof_getinfo(struct tprof_info *info) } static int -tprof_start(const tprof_param_t *param) +tprof_getncounters(u_int *ncounters) +{ + tprof_backend_t *tb; + + tb = tprof_backend; + if (tb == NULL) + return ENOENT; + + *ncounters = tb->tb_ops->tbo_ncounters(); + return 0; +} + +static void +tprof_start_cpu(void *arg1, void *arg2) +{ + tprof_backend_t *tb = arg1; + tprof_countermask_t runmask = (uintptr_t)arg2; + + tb->tb_ops->tbo_start(runmask); +} + +static void +tprof_stop_cpu(void *arg1, void *arg2) +{ + tprof_backend_t *tb = arg1; + tprof_countermask_t stopmask = (uintptr_t)arg2; + + tb->tb_ops->tbo_stop(stopmask); +} + +static int +tprof_start(tprof_countermask_t runmask) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; - int error; - uint64_t freq; tprof_backend_t *tb; + uint64_t xc; + int error; + bool firstrun; KASSERT(mutex_owned(&tprof_startstop_lock)); - if (tprof_running) { - error = EBUSY; - goto done; - } tb = tprof_backend; if (tb == NULL) { error = ENOENT; goto done; } - if (tb->tb_usecount > 0) { - error = EBUSY; + + runmask &= ~tb->tb_softc.sc_ctr_running_mask; + runmask &= tb->tb_softc.sc_ctr_configured_mask; + if (runmask == 0) { + /* + * targets are already running. + * unconfigured counters are ignored. + */ + error = 0; goto done; } - tb->tb_usecount++; - freq = tb->tb_ops->tbo_estimate_freq(); - tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF); - - error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL, - PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); - if (error != 0) { - goto done; + firstrun = (tb->tb_softc.sc_ctr_running_mask == 0); + if (firstrun) { + if (tb->tb_ops->tbo_establish != NULL) { + error = tb->tb_ops->tbo_establish(&tb->tb_softc); + if (error != 0) + goto done; + } + + tprof_samples_per_buf = TPROF_MAX_SAMPLES_PER_BUF; + error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, + NULL, PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); + if (error != 0) { + if (tb->tb_ops->tbo_disestablish != NULL) + tb->tb_ops->tbo_disestablish(&tb->tb_softc); + goto done; + } + + for (CPU_INFO_FOREACH(cii, ci)) { + tprof_cpu_t * const c = tprof_cpu(ci); + tprof_buf_t *new; + tprof_buf_t *old; + + new = tprof_buf_alloc(); + old = tprof_buf_switch(c, new); + if (old != NULL) { + tprof_buf_free(old); + } + callout_init(&c->c_callout, CALLOUT_MPSAFE); + callout_setfunc(&c->c_callout, tprof_kick, ci); + } } - for (CPU_INFO_FOREACH(cii, ci)) { - tprof_cpu_t * const c = tprof_cpu(ci); - tprof_buf_t *new; - tprof_buf_t *old; + runmask &= tb->tb_softc.sc_ctr_configured_mask; + xc = xc_broadcast(0, tprof_start_cpu, tb, (void *)(uintptr_t)runmask); + xc_wait(xc); + mutex_enter(&tprof_lock); + tb->tb_softc.sc_ctr_running_mask |= runmask; + mutex_exit(&tprof_lock); - new = tprof_buf_alloc(); - old = tprof_buf_switch(c, new); - if (old != NULL) { - tprof_buf_free(old); + if (firstrun) { + for (CPU_INFO_FOREACH(cii, ci)) { + tprof_cpu_t * const c = tprof_cpu(ci); + + mutex_enter(&tprof_lock); + tprof_nworker++; + mutex_exit(&tprof_lock); + workqueue_enqueue(tprof_wq, &c->c_work, ci); } - callout_init(&c->c_callout, CALLOUT_MPSAFE); - callout_setfunc(&c->c_callout, tprof_kick, ci); } +done: + return error; +} - error = tb->tb_ops->tbo_start(param); - if (error != 0) { - KASSERT(tb->tb_usecount > 0); - tb->tb_usecount--; - tprof_stop1(); +static void +tprof_stop(tprof_countermask_t stopmask) +{ + tprof_backend_t *tb; + uint64_t xc; + + tb = tprof_backend; + if (tb == NULL) + return; + + KASSERT(mutex_owned(&tprof_startstop_lock)); + stopmask &= tb->tb_softc.sc_ctr_running_mask; + if (stopmask == 0) { + /* targets are not running */ goto done; } + xc = xc_broadcast(0, tprof_stop_cpu, tb, (void *)(uintptr_t)stopmask); + xc_wait(xc); mutex_enter(&tprof_lock); - tprof_running = true; + tb->tb_softc.sc_ctr_running_mask &= ~stopmask; mutex_exit(&tprof_lock); - for (CPU_INFO_FOREACH(cii, ci)) { - tprof_cpu_t * const c = tprof_cpu(ci); + /* all counters have stopped? */ + if (tb->tb_softc.sc_ctr_running_mask == 0) { mutex_enter(&tprof_lock); - tprof_nworker++; + cv_broadcast(&tprof_reader_cv); + while (tprof_nworker > 0) { + cv_wait(&tprof_cv, &tprof_lock); + } mutex_exit(&tprof_lock); - workqueue_enqueue(tprof_wq, &c->c_work, ci); + + tprof_stop1(); + if (tb->tb_ops->tbo_disestablish != NULL) + tb->tb_ops->tbo_disestablish(&tb->tb_softc); } done: - return error; + ; } static void -tprof_stop(void) +tprof_init_percpu_counters_offset(void *vp, void *vp2, struct cpu_info *ci) +{ + uint64_t *counters_offset = vp; + u_int counter = (uintptr_t)vp2; + + tprof_backend_t *tb = tprof_backend; + tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param; + counters_offset[counter] = param->p_value; +} + +static void +tprof_configure_event_cpu(void *arg1, void *arg2) +{ + tprof_backend_t *tb = arg1; + u_int counter = (uintptr_t)arg2; + tprof_param_t *param = &tb->tb_softc.sc_count[counter].ctr_param; + + tb->tb_ops->tbo_configure_event(counter, param); +} + +static int +tprof_configure_event(const tprof_param_t *param) { tprof_backend_t *tb; + tprof_backend_softc_t *sc; + tprof_param_t *sc_param; + uint64_t xc; + int c, error; - KASSERT(mutex_owned(&tprof_startstop_lock)); - if (!tprof_running) { + if ((param->p_flags & (TPROF_PARAM_USER | TPROF_PARAM_KERN)) == 0) { + error = EINVAL; goto done; } tb = tprof_backend; - KASSERT(tb->tb_usecount > 0); - tb->tb_ops->tbo_stop(NULL); - tb->tb_usecount--; + if (tb == NULL) { + error = ENOENT; + goto done; + } + sc = &tb->tb_softc; - mutex_enter(&tprof_lock); - tprof_running = false; - cv_broadcast(&tprof_reader_cv); - while (tprof_nworker > 0) { - cv_wait(&tprof_cv, &tprof_lock); + c = param->p_counter; + if (c >= tb->tb_softc.sc_ncounters) { + error = EINVAL; + goto done; + } + + if (tb->tb_ops->tbo_valid_event != NULL) { + error = tb->tb_ops->tbo_valid_event(param->p_counter, param); + if (error != 0) + goto done; + } + + /* if already running, stop the counter */ + if (ISSET(c, tb->tb_softc.sc_ctr_running_mask)) + tprof_stop(__BIT(c)); + + sc->sc_count[c].ctr_bitwidth = + tb->tb_ops->tbo_counter_bitwidth(param->p_counter); + + sc_param = &sc->sc_count[c].ctr_param; + memcpy(sc_param, param, sizeof(*sc_param)); /* save copy of param */ + + if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) { + uint64_t freq, inum, dnum; + + freq = tb->tb_ops->tbo_counter_estimate_freq(c); + sc->sc_count[c].ctr_counter_val = freq / TPROF_HZ; + if (sc->sc_count[c].ctr_counter_val == 0) { + printf("%s: counter#%d frequency (%"PRIu64") is" + " very low relative to TPROF_HZ (%u)\n", __func__, + c, freq, TPROF_HZ); + sc->sc_count[c].ctr_counter_val = + 4000000000ULL / TPROF_HZ; + } + + switch (param->p_flags & TPROF_PARAM_VALUE2_MASK) { + case TPROF_PARAM_VALUE2_SCALE: + if (sc_param->p_value2 == 0) + break; + /* + * p_value2 is 64-bit fixed-point + * upper 32 bits are the integer part + * lower 32 bits are the decimal part + */ + inum = sc_param->p_value2 >> 32; + dnum = sc_param->p_value2 & __BITS(31, 0); + sc->sc_count[c].ctr_counter_val = + sc->sc_count[c].ctr_counter_val * inum + + (sc->sc_count[c].ctr_counter_val * dnum >> 32); + if (sc->sc_count[c].ctr_counter_val == 0) + sc->sc_count[c].ctr_counter_val = 1; + break; + case TPROF_PARAM_VALUE2_TRIGGERCOUNT: + if (sc_param->p_value2 == 0) + sc_param->p_value2 = 1; + if (sc_param->p_value2 > + __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)) { + sc_param->p_value2 = + __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0); + } + sc->sc_count[c].ctr_counter_val = sc_param->p_value2; + break; + default: + break; + } + sc->sc_count[c].ctr_counter_reset_val = + -sc->sc_count[c].ctr_counter_val; + sc->sc_count[c].ctr_counter_reset_val &= + __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0); + } else { + sc->sc_count[c].ctr_counter_val = 0; + sc->sc_count[c].ctr_counter_reset_val = 0; } + + /* At this point, p_value is used as an initial value */ + percpu_foreach(tb->tb_softc.sc_ctr_offset_percpu, + tprof_init_percpu_counters_offset, (void *)(uintptr_t)c); + /* On the backend side, p_value is used as the reset value */ + sc_param->p_value = tb->tb_softc.sc_count[c].ctr_counter_reset_val; + + xc = xc_broadcast(0, tprof_configure_event_cpu, + tb, (void *)(uintptr_t)c); + xc_wait(xc); + + mutex_enter(&tprof_lock); + /* update counters bitmasks */ + SET(tb->tb_softc.sc_ctr_configured_mask, __BIT(c)); + CLR(tb->tb_softc.sc_ctr_prof_mask, __BIT(c)); + CLR(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); + /* profiled counter requires overflow handling */ + if (ISSET(param->p_flags, TPROF_PARAM_PROFILE)) { + SET(tb->tb_softc.sc_ctr_prof_mask, __BIT(c)); + SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); + } + /* counters with less than 64bits also require overflow handling */ + if (sc->sc_count[c].ctr_bitwidth != 64) + SET(tb->tb_softc.sc_ctr_ovf_mask, __BIT(c)); mutex_exit(&tprof_lock); - tprof_stop1(); -done: - ; + error = 0; + + done: + return error; +} + +static void +tprof_getcounts_cpu(void *arg1, void *arg2) +{ + tprof_backend_t *tb = arg1; + tprof_backend_softc_t *sc = &tb->tb_softc; + uint64_t *counters = arg2; + uint64_t *counters_offset; + unsigned int c; + + tprof_countermask_t configmask = sc->sc_ctr_configured_mask; + counters_offset = percpu_getref(sc->sc_ctr_offset_percpu); + for (c = 0; c < sc->sc_ncounters; c++) { + if (ISSET(configmask, __BIT(c))) { + uint64_t ctr = tb->tb_ops->tbo_counter_read(c); + counters[c] = counters_offset[c] + + ((ctr - sc->sc_count[c].ctr_counter_reset_val) & + __BITS(sc->sc_count[c].ctr_bitwidth - 1, 0)); + } else { + counters[c] = 0; + } + } + percpu_putref(sc->sc_ctr_offset_percpu); +} + +static int +tprof_getcounts(tprof_counts_t *counts) +{ + struct cpu_info *ci; + tprof_backend_t *tb; + uint64_t xc; + + tb = tprof_backend; + if (tb == NULL) + return ENOENT; + + if (counts->c_cpu >= ncpu) + return ESRCH; + ci = cpu_lookup(counts->c_cpu); + if (ci == NULL) + return ESRCH; + + xc = xc_unicast(0, tprof_getcounts_cpu, tb, counts->c_count, ci); + xc_wait(xc); + + counts->c_ncounters = tb->tb_softc.sc_ncounters; + counts->c_runningmask = tb->tb_softc.sc_ctr_running_mask; + return 0; } /* @@ -457,7 +719,8 @@ tprof_sample(void *unused, const tprof_f sp->s_pid = l->l_proc->p_pid; sp->s_lwpid = l->l_lid; sp->s_cpuid = c->c_cpuid; - sp->s_flags = (tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0; + sp->s_flags = ((tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0) | + __SHIFTIN(tfi->tfi_counter, TPROF_SAMPLE_COUNTER_MASK); sp->s_pc = pc; buf->b_used = idx + 1; } @@ -488,10 +751,9 @@ tprof_backend_register(const char *name, return ENOTSUP; } #endif - tb = kmem_alloc(sizeof(*tb), KM_SLEEP); + tb = kmem_zalloc(sizeof(*tb), KM_SLEEP); tb->tb_name = name; tb->tb_ops = ops; - tb->tb_usecount = 0; LIST_INSERT_HEAD(&tprof_backends, tb, tb_list); #if 1 /* XXX for now */ if (tprof_backend == NULL) { @@ -500,6 +762,13 @@ tprof_backend_register(const char *name, #endif mutex_exit(&tprof_startstop_lock); + /* init backend softc */ + tb->tb_softc.sc_ncounters = tb->tb_ops->tbo_ncounters(); + tb->tb_softc.sc_ctr_offset_percpu_size = + sizeof(uint64_t) * tb->tb_softc.sc_ncounters; + tb->tb_softc.sc_ctr_offset_percpu = + percpu_alloc(tb->tb_softc.sc_ctr_offset_percpu_size); + return 0; } @@ -520,7 +789,7 @@ tprof_backend_unregister(const char *nam panic("%s: not found '%s'", __func__, name); } #endif /* defined(DIAGNOSTIC) */ - if (tb->tb_usecount > 0) { + if (tb->tb_softc.sc_ctr_running_mask != 0) { mutex_exit(&tprof_startstop_lock); return EBUSY; } @@ -532,6 +801,11 @@ tprof_backend_unregister(const char *nam LIST_REMOVE(tb, tb_list); mutex_exit(&tprof_startstop_lock); + /* fini backend softc */ + percpu_free(tb->tb_softc.sc_ctr_offset_percpu, + tb->tb_softc.sc_ctr_offset_percpu_size); + + /* free backend */ kmem_free(tb, sizeof(*tb)); return 0; @@ -567,8 +841,17 @@ tprof_close(dev_t dev, int flags, int ty mutex_enter(&tprof_lock); tprof_owner = NULL; mutex_exit(&tprof_lock); - tprof_stop(); + tprof_stop(TPROF_COUNTERMASK_ALL); tprof_clear(); + + tprof_backend_t *tb = tprof_backend; + if (tb != NULL) { + KASSERT(tb->tb_softc.sc_ctr_running_mask == 0); + tb->tb_softc.sc_ctr_configured_mask = 0; + tb->tb_softc.sc_ctr_prof_mask = 0; + tb->tb_softc.sc_ctr_ovf_mask = 0; + } + mutex_exit(&tprof_startstop_lock); return 0; @@ -644,6 +927,7 @@ static int tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) { const tprof_param_t *param; + tprof_counts_t *counts; int error = 0; KASSERT(minor(dev) == 0); @@ -654,15 +938,19 @@ tprof_ioctl(dev_t dev, u_long cmd, void tprof_getinfo(data); mutex_exit(&tprof_startstop_lock); break; + case TPROF_IOC_GETNCOUNTERS: + mutex_enter(&tprof_lock); + error = tprof_getncounters((u_int *)data); + mutex_exit(&tprof_lock); + break; case TPROF_IOC_START: - param = data; mutex_enter(&tprof_startstop_lock); - error = tprof_start(param); + error = tprof_start(*(tprof_countermask_t *)data); mutex_exit(&tprof_startstop_lock); break; case TPROF_IOC_STOP: mutex_enter(&tprof_startstop_lock); - tprof_stop(); + tprof_stop(*(tprof_countermask_t *)data); mutex_exit(&tprof_startstop_lock); break; case TPROF_IOC_GETSTAT: @@ -670,6 +958,18 @@ tprof_ioctl(dev_t dev, u_long cmd, void memcpy(data, &tprof_stat, sizeof(tprof_stat)); mutex_exit(&tprof_lock); break; + case TPROF_IOC_CONFIGURE_EVENT: + param = data; + mutex_enter(&tprof_startstop_lock); + error = tprof_configure_event(param); + mutex_exit(&tprof_startstop_lock); + break; + case TPROF_IOC_GETCOUNTS: + counts = data; + mutex_enter(&tprof_startstop_lock); + error = tprof_getcounts(counts); + mutex_exit(&tprof_startstop_lock); + break; default: error = EINVAL; break; Index: src/sys/dev/tprof/tprof.h diff -u src/sys/dev/tprof/tprof.h:1.6 src/sys/dev/tprof/tprof.h:1.7 --- src/sys/dev/tprof/tprof.h:1.6 Fri Jul 13 07:56:29 2018 +++ src/sys/dev/tprof/tprof.h Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.h,v 1.6 2018/07/13 07:56:29 maxv Exp $ */ +/* $NetBSD: tprof.h,v 1.7 2022/12/01 00:32:52 ryo Exp $ */ /*- * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, @@ -37,19 +37,45 @@ #include <dev/tprof/tprof_types.h> +struct tprof_backend_softc_counter { + tprof_param_t ctr_param; + u_int ctr_bitwidth; + uint64_t ctr_counter_val; + uint64_t ctr_counter_reset_val; +}; + +typedef struct tprof_backend_softc { + u_int sc_ncounters; + tprof_countermask_t sc_ctr_running_mask;/* start/stop */ + tprof_countermask_t sc_ctr_configured_mask; /* configured */ + tprof_countermask_t sc_ctr_ovf_mask; /* overflow intr required */ + tprof_countermask_t sc_ctr_prof_mask; /* profiled */ + percpu_t *sc_ctr_offset_percpu; + size_t sc_ctr_offset_percpu_size; + struct tprof_backend_softc_counter sc_count[TPROF_MAXCOUNTERS]; +} tprof_backend_softc_t; + typedef struct tprof_backend_ops { - uint64_t (*tbo_estimate_freq)(void); /* samples per second */ uint32_t (*tbo_ident)(void); - int (*tbo_start)(const tprof_param_t *); - void (*tbo_stop)(const tprof_param_t *); + u_int (*tbo_ncounters)(void); + u_int (*tbo_counter_bitwidth)(u_int); + uint64_t (*tbo_counter_read)(u_int); + uint64_t (*tbo_counter_estimate_freq)(u_int); + int (*tbo_valid_event)(u_int, const tprof_param_t *); + void (*tbo_configure_event)(u_int, const tprof_param_t *); + void (*tbo_start)(tprof_countermask_t); + void (*tbo_stop)(tprof_countermask_t); + int (*tbo_establish)(tprof_backend_softc_t *); + void (*tbo_disestablish)(tprof_backend_softc_t *); } tprof_backend_ops_t; -#define TPROF_BACKEND_VERSION 3 +#define TPROF_BACKEND_VERSION 4 int tprof_backend_register(const char *, const tprof_backend_ops_t *, int); int tprof_backend_unregister(const char *); typedef struct { uintptr_t tfi_pc; /* program counter */ + u_int tfi_counter; /* counter. 0..(TPROF_MAXCOUNTERS-1) */ bool tfi_inkernel; /* if tfi_pc is in the kernel address space */ } tprof_frame_info_t; Index: src/sys/dev/tprof/tprof_armv7.c diff -u src/sys/dev/tprof/tprof_armv7.c:1.9 src/sys/dev/tprof/tprof_armv7.c:1.10 --- src/sys/dev/tprof/tprof_armv7.c:1.9 Thu Dec 1 00:29:51 2022 +++ src/sys/dev/tprof/tprof_armv7.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_armv7.c,v 1.9 2022/12/01 00:29:51 ryo Exp $ */ +/* $NetBSD: tprof_armv7.c,v 1.10 2022/12/01 00:32:52 ryo Exp $ */ /*- * Copyright (c) 2018 Jared McNeill <jmcne...@invisible.ca> @@ -27,11 +27,12 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.9 2022/12/01 00:29:51 ryo Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.10 2022/12/01 00:32:52 ryo Exp $"); #include <sys/param.h> #include <sys/bus.h> #include <sys/cpu.h> +#include <sys/percpu.h> #include <sys/xcall.h> #include <dev/tprof/tprof.h> @@ -50,15 +51,13 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_armv7. #define PMCNTEN_C __BIT(31) #define PMCNTEN_P __BITS(30,0) +#define PMOVS_C __BIT(31) +#define PMOVS_P __BITS(30,0) + #define PMEVTYPER_P __BIT(31) #define PMEVTYPER_U __BIT(30) #define PMEVTYPER_EVTCOUNT __BITS(7,0) -static tprof_param_t armv7_pmu_param; -static const u_int armv7_pmu_counter = 1; -static uint32_t counter_val; -static uint32_t counter_reset_val; - static uint16_t cortexa9_events[] = { 0x40, 0x41, 0x42, 0x50, 0x51, @@ -118,7 +117,7 @@ armv7_pmu_set_pmevtyper(u_int counter, u armreg_pmxevtyper_write(val); } -static void +static inline void armv7_pmu_set_pmevcntr(u_int counter, uint32_t val) { armreg_pmselr_write(counter); @@ -126,138 +125,175 @@ armv7_pmu_set_pmevcntr(u_int counter, ui armreg_pmxevcntr_write(val); } -static void -armv7_pmu_start_cpu(void *arg1, void *arg2) +static inline uint64_t +armv7_pmu_get_pmevcntr(u_int counter) { - const uint32_t counter_mask = __BIT(armv7_pmu_counter); - uint64_t pmcr, pmevtyper; - - /* Enable performance monitor */ - pmcr = armreg_pmcr_read(); - pmcr |= PMCR_E; - armreg_pmcr_write(pmcr); - - /* Disable event counter */ - armreg_pmcntenclr_write(counter_mask); - - /* Configure event counter */ - pmevtyper = __SHIFTIN(armv7_pmu_param.p_event, PMEVTYPER_EVTCOUNT); - if (!ISSET(armv7_pmu_param.p_flags, TPROF_PARAM_USER)) - pmevtyper |= PMEVTYPER_U; - if (!ISSET(armv7_pmu_param.p_flags, TPROF_PARAM_KERN)) - pmevtyper |= PMEVTYPER_P; - - armv7_pmu_set_pmevtyper(armv7_pmu_counter, pmevtyper); - - /* Enable overflow interrupts */ - armreg_pmintenset_write(counter_mask); - - /* Clear overflow flag */ - armreg_pmovsr_write(counter_mask); + armreg_pmselr_write(counter); + isb(); + return armreg_pmxevcntr_read(); +} - /* Initialize event counter value */ - armv7_pmu_set_pmevcntr(armv7_pmu_counter, counter_reset_val); +/* read and write at once */ +static inline uint64_t +armv7_pmu_getset_pmevcntr(u_int counter, uint64_t val) +{ + uint64_t c; - /* Enable event counter */ - armreg_pmcntenset_write(counter_mask); + armreg_pmselr_write(counter); + isb(); + c = armreg_pmxevcntr_read(); + armreg_pmxevcntr_write(val); + return c; } -static void -armv7_pmu_stop_cpu(void *arg1, void *arg2) +static uint32_t +armv7_pmu_ncounters(void) { - const uint32_t counter_mask = __BIT(armv7_pmu_counter); - - /* Disable overflow interrupts */ - armreg_pmintenclr_write(counter_mask); + return __SHIFTOUT(armreg_pmcr_read(), PMCR_N); +} - /* Disable event counter */ - armreg_pmcntenclr_write(counter_mask); +static u_int +armv7_pmu_counter_bitwidth(u_int counter) +{ + return 32; } static uint64_t -armv7_pmu_estimate_freq(void) +armv7_pmu_counter_estimate_freq(u_int counter) { uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq; - uint64_t freq = 10000; - uint32_t pmcr; - - counter_val = cpufreq / freq; - if (counter_val == 0) - counter_val = 4000000000ULL / freq; - - pmcr = armreg_pmcr_read(); - if (pmcr & PMCR_D) - counter_val /= 64; - return freq; -} - -static uint32_t -armv7_pmu_ident(void) -{ - return TPROF_IDENT_ARMV7_GENERIC; + if (ISSET(armreg_pmcr_read(), PMCR_D)) + cpufreq /= 64; + return cpufreq; } static int -armv7_pmu_start(const tprof_param_t *param) +armv7_pmu_valid_event(u_int counter, const tprof_param_t *param) { - /* PMCR.N of 0 means that no event counters are available */ - if (__SHIFTOUT(armreg_pmcr_read(), PMCR_N) == 0) { - return EINVAL; - } - if (!armv7_pmu_event_implemented(param->p_event)) { - printf("%s: event %#llx not implemented on this CPU\n", + printf("%s: event %#" PRIx64 " not implemented on this CPU\n", __func__, param->p_event); return EINVAL; } + return 0; +} - counter_reset_val = -counter_val + 1; +static void +armv7_pmu_configure_event(u_int counter, const tprof_param_t *param) +{ + /* Disable event counter */ + armreg_pmcntenclr_write(__BIT(counter) & PMCNTEN_P); - armv7_pmu_param = *param; - uint64_t xc = xc_broadcast(0, armv7_pmu_start_cpu, NULL, NULL); - xc_wait(xc); + /* Disable overflow interrupts */ + armreg_pmintenclr_write(__BIT(counter) & PMINTEN_P); - return 0; + /* Configure event counter */ + uint32_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT); + if (!ISSET(param->p_flags, TPROF_PARAM_USER)) + pmevtyper |= PMEVTYPER_U; + if (!ISSET(param->p_flags, TPROF_PARAM_KERN)) + pmevtyper |= PMEVTYPER_P; + armv7_pmu_set_pmevtyper(counter, pmevtyper); + + /* + * Enable overflow interrupts. + * Whether profiled or not, the counter width of armv7 is 32 bits, + * so overflow handling is required anyway. + */ + armreg_pmintenset_write(__BIT(counter) & PMINTEN_P); + + /* Clear overflow flag */ + armreg_pmovsr_write(__BIT(counter) & PMOVS_P); + + /* reset the counter */ + armv7_pmu_set_pmevcntr(counter, param->p_value); } static void -armv7_pmu_stop(const tprof_param_t *param) +armv7_pmu_start(tprof_countermask_t runmask) { - uint64_t xc; + /* Enable event counters */ + armreg_pmcntenset_write(runmask & PMCNTEN_P); - xc = xc_broadcast(0, armv7_pmu_stop_cpu, NULL, NULL); - xc_wait(xc); + /* + * PMCR.E is shared with PMCCNTR and event counters. + * It is set here in case PMCCNTR is not used in the system. + */ + armreg_pmcr_write(armreg_pmcr_read() | PMCR_E); } -static const tprof_backend_ops_t tprof_armv7_pmu_ops = { - .tbo_estimate_freq = armv7_pmu_estimate_freq, - .tbo_ident = armv7_pmu_ident, - .tbo_start = armv7_pmu_start, - .tbo_stop = armv7_pmu_stop, -}; +static void +armv7_pmu_stop(tprof_countermask_t stopmask) +{ + /* Disable event counter */ + armreg_pmcntenclr_write(stopmask & PMCNTEN_P); +} + +/* XXX: argument of armv8_pmu_intr() */ +extern struct tprof_backend *tprof_backend; +static void *pmu_intr_arg; int armv7_pmu_intr(void *priv) { const struct trapframe * const tf = priv; - const uint32_t counter_mask = __BIT(armv7_pmu_counter); + tprof_backend_softc_t *sc = pmu_intr_arg; tprof_frame_info_t tfi; + int bit; + const uint32_t pmovs = armreg_pmovsr_read() & PMOVS_P; - const uint32_t pmovsr = armreg_pmovsr_read(); - if ((pmovsr & counter_mask) != 0) { - tfi.tfi_pc = tf->tf_pc; - tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS && - tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS; - tprof_sample(NULL, &tfi); - - armv7_pmu_set_pmevcntr(armv7_pmu_counter, counter_reset_val); + uint64_t *counters_offset = + percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu()); + uint32_t mask = pmovs; + while ((bit = ffs(mask)) != 0) { + bit--; + CLR(mask, __BIT(bit)); + + if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { + /* account for the counter, and reset */ + uint64_t ctr = armv7_pmu_getset_pmevcntr(bit, + sc->sc_count[bit].ctr_counter_reset_val); + counters_offset[bit] += + sc->sc_count[bit].ctr_counter_val + ctr; + + /* record a sample */ + tfi.tfi_pc = tf->tf_pc; + tfi.tfi_counter = bit; + tfi.tfi_inkernel = + tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS && + tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS; + tprof_sample(NULL, &tfi); + } else { + /* counter has overflowed */ + counters_offset[bit] += __BIT(32); + } } - armreg_pmovsr_write(pmovsr); + armreg_pmovsr_write(pmovs); return 1; } +static uint32_t +armv7_pmu_ident(void) +{ + return TPROF_IDENT_ARMV7_GENERIC; +} + +static const tprof_backend_ops_t tprof_armv7_pmu_ops = { + .tbo_ident = armv7_pmu_ident, + .tbo_ncounters = armv7_pmu_ncounters, + .tbo_counter_bitwidth = armv7_pmu_counter_bitwidth, + .tbo_counter_read = armv7_pmu_get_pmevcntr, + .tbo_counter_estimate_freq = armv7_pmu_counter_estimate_freq, + .tbo_valid_event = armv7_pmu_valid_event, + .tbo_configure_event = armv7_pmu_configure_event, + .tbo_start = armv7_pmu_start, + .tbo_stop = armv7_pmu_stop, + .tbo_establish = NULL, + .tbo_disestablish = NULL, +}; + static void armv7_pmu_init_cpu(void *arg1, void *arg2) { @@ -274,9 +310,21 @@ armv7_pmu_init_cpu(void *arg1, void *arg int armv7_pmu_init(void) { + int error, ncounters; + + ncounters = armv7_pmu_ncounters(); + if (ncounters == 0) + return ENOTSUP; + uint64_t xc = xc_broadcast(0, armv7_pmu_init_cpu, NULL, NULL); xc_wait(xc); - return tprof_backend_register("tprof_armv7", &tprof_armv7_pmu_ops, + error = tprof_backend_register("tprof_armv7", &tprof_armv7_pmu_ops, TPROF_BACKEND_VERSION); + if (error == 0) { + /* XXX: for argument of armv7_pmu_intr() */ + pmu_intr_arg = tprof_backend; + } + + return error; } Index: src/sys/dev/tprof/tprof_armv8.c diff -u src/sys/dev/tprof/tprof_armv8.c:1.17 src/sys/dev/tprof/tprof_armv8.c:1.18 --- src/sys/dev/tprof/tprof_armv8.c:1.17 Thu Dec 1 00:29:10 2022 +++ src/sys/dev/tprof/tprof_armv8.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_armv8.c,v 1.17 2022/12/01 00:29:10 ryo Exp $ */ +/* $NetBSD: tprof_armv8.c,v 1.18 2022/12/01 00:32:52 ryo Exp $ */ /*- * Copyright (c) 2018 Jared McNeill <jmcne...@invisible.ca> @@ -27,11 +27,12 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.17 2022/12/01 00:29:10 ryo Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.18 2022/12/01 00:32:52 ryo Exp $"); #include <sys/param.h> #include <sys/bus.h> #include <sys/cpu.h> +#include <sys/percpu.h> #include <sys/xcall.h> #include <dev/tprof/tprof.h> @@ -41,10 +42,12 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_armv8. #include <dev/tprof/tprof_armv8.h> -static tprof_param_t armv8_pmu_param; -static const u_int armv8_pmu_counter = 0; -static uint32_t counter_val; -static uint32_t counter_reset_val; +static u_int counter_bitwidth; + +/* + * armv8 can handle up to 31 event counters, + * PMCR_EL0.N counters are actually available. + */ static bool armv8_pmu_event_implemented(uint16_t event) @@ -75,137 +78,178 @@ armv8_pmu_set_pmevtyper(u_int counter, u reg_pmxevtyper_el0_write(val); } -static void -armv8_pmu_set_pmevcntr(u_int counter, uint32_t val) +static inline void +armv8_pmu_set_pmevcntr(u_int counter, uint64_t val) { reg_pmselr_el0_write(counter); isb(); reg_pmxevcntr_el0_write(val); } -static void -armv8_pmu_start_cpu(void *arg1, void *arg2) +static inline uint64_t +armv8_pmu_get_pmevcntr(u_int counter) { - const uint32_t counter_mask = __BIT(armv8_pmu_counter); - uint64_t pmevtyper; - - /* Disable event counter */ - reg_pmcntenclr_el0_write(counter_mask); - - /* Configure event counter */ - pmevtyper = __SHIFTIN(armv8_pmu_param.p_event, PMEVTYPER_EVTCOUNT); - if (!ISSET(armv8_pmu_param.p_flags, TPROF_PARAM_USER)) - pmevtyper |= PMEVTYPER_U; - if (!ISSET(armv8_pmu_param.p_flags, TPROF_PARAM_KERN)) - pmevtyper |= PMEVTYPER_P; - - armv8_pmu_set_pmevtyper(armv8_pmu_counter, pmevtyper); - - /* Enable overflow interrupts */ - reg_pmintenset_el1_write(counter_mask); - - /* Clear overflow flag */ - reg_pmovsclr_el0_write(counter_mask); - - /* Initialize event counter value */ - armv8_pmu_set_pmevcntr(armv8_pmu_counter, counter_reset_val); - - /* Enable event counter */ - reg_pmcntenset_el0_write(counter_mask); - reg_pmcr_el0_write(reg_pmcr_el0_read() | PMCR_E); + reg_pmselr_el0_write(counter); + isb(); + return reg_pmxevcntr_el0_read(); } -static void -armv8_pmu_stop_cpu(void *arg1, void *arg2) +/* read and write at once */ +static inline uint64_t +armv8_pmu_getset_pmevcntr(u_int counter, uint64_t val) { - const uint32_t counter_mask = __BIT(armv8_pmu_counter); - - /* Disable overflow interrupts */ - reg_pmintenclr_el1_write(counter_mask); + uint64_t c; - /* Disable event counter */ - reg_pmcntenclr_el0_write(counter_mask); + reg_pmselr_el0_write(counter); + isb(); + c = reg_pmxevcntr_el0_read(); + reg_pmxevcntr_el0_write(val); + return c; } -static uint64_t -armv8_pmu_estimate_freq(void) +static uint32_t +armv8_pmu_ncounters(void) { - uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq; - uint64_t freq = 10000; - - counter_val = cpufreq / freq; - if (counter_val == 0) - counter_val = 4000000000ULL / freq; + return __SHIFTOUT(reg_pmcr_el0_read(), PMCR_N); +} - return freq; +static u_int +armv8_pmu_counter_bitwidth(u_int counter) +{ + return counter_bitwidth; } -static uint32_t -armv8_pmu_ident(void) +static uint64_t +armv8_pmu_counter_estimate_freq(u_int counter) { - return TPROF_IDENT_ARMV8_GENERIC; + return curcpu()->ci_data.cpu_cc_freq; } static int -armv8_pmu_start(const tprof_param_t *param) +armv8_pmu_valid_event(u_int counter, const tprof_param_t *param) { - /* PMCR.N of 0 means that no event counters are available */ - if (__SHIFTOUT(reg_pmcr_el0_read(), PMCR_N) == 0) { - return EINVAL; - } - if (!armv8_pmu_event_implemented(param->p_event)) { printf("%s: event %#" PRIx64 " not implemented on this CPU\n", __func__, param->p_event); return EINVAL; } + return 0; +} + +static void +armv8_pmu_configure_event(u_int counter, const tprof_param_t *param) +{ + /* Disable event counter */ + reg_pmcntenclr_el0_write(__BIT(counter) & PMCNTEN_P); - counter_reset_val = -counter_val + 1; + /* Disable overflow interrupts */ + reg_pmintenclr_el1_write(__BIT(counter) & PMINTEN_P); - armv8_pmu_param = *param; - uint64_t xc = xc_broadcast(0, armv8_pmu_start_cpu, NULL, NULL); - xc_wait(xc); + /* Configure event counter */ + uint64_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT); + if (!ISSET(param->p_flags, TPROF_PARAM_USER)) + pmevtyper |= PMEVTYPER_U; + if (!ISSET(param->p_flags, TPROF_PARAM_KERN)) + pmevtyper |= PMEVTYPER_P; + armv8_pmu_set_pmevtyper(counter, pmevtyper); - return 0; + if (ISSET(param->p_flags, TPROF_PARAM_PROFILE) || + counter_bitwidth != 64) { + /* Enable overflow interrupts */ + reg_pmintenset_el1_write(__BIT(counter) & PMINTEN_P); + } + + /* Clear overflow flag */ + reg_pmovsclr_el0_write(__BIT(counter) & PMOVS_P); + + /* reset the counter */ + armv8_pmu_set_pmevcntr(counter, param->p_value); } static void -armv8_pmu_stop(const tprof_param_t *param) +armv8_pmu_start(tprof_countermask_t runmask) { - uint64_t xc; + /* Enable event counters */ + reg_pmcntenset_el0_write(runmask & PMCNTEN_P); - xc = xc_broadcast(0, armv8_pmu_stop_cpu, NULL, NULL); - xc_wait(xc); + /* + * PMCR.E is shared with PMCCNTR_EL0 and event counters. + * It is set here in case PMCCNTR_EL0 is not used in the system. + */ + reg_pmcr_el0_write(reg_pmcr_el0_read() | PMCR_E); } -static const tprof_backend_ops_t tprof_armv8_pmu_ops = { - .tbo_estimate_freq = armv8_pmu_estimate_freq, - .tbo_ident = armv8_pmu_ident, - .tbo_start = armv8_pmu_start, - .tbo_stop = armv8_pmu_stop, -}; +static void +armv8_pmu_stop(tprof_countermask_t stopmask) +{ + /* Disable event counter */ + reg_pmcntenclr_el0_write(stopmask & PMCNTEN_P); +} + +/* XXX: argument of armv8_pmu_intr() */ +extern struct tprof_backend *tprof_backend; +static void *pmu_intr_arg; int armv8_pmu_intr(void *priv) { const struct trapframe * const tf = priv; - const uint32_t counter_mask = __BIT(armv8_pmu_counter); + tprof_backend_softc_t *sc = pmu_intr_arg; tprof_frame_info_t tfi; + int bit; + const uint32_t pmovs = reg_pmovsset_el0_read() & PMOVS_P; - const uint32_t pmovs = reg_pmovsset_el0_read(); - if ((pmovs & counter_mask) != 0) { - tfi.tfi_pc = tf->tf_pc; - tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS && - tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS; - tprof_sample(NULL, &tfi); - - armv8_pmu_set_pmevcntr(armv8_pmu_counter, counter_reset_val); + uint64_t *counters_offset = + percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu()); + uint32_t mask = pmovs; + while ((bit = ffs(mask)) != 0) { + bit--; + CLR(mask, __BIT(bit)); + + if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { + /* account for the counter, and reset */ + uint64_t ctr = armv8_pmu_getset_pmevcntr(bit, + sc->sc_count[bit].ctr_counter_reset_val); + counters_offset[bit] += + sc->sc_count[bit].ctr_counter_val + ctr; + + /* record a sample */ + tfi.tfi_pc = tf->tf_pc; + tfi.tfi_counter = bit; + tfi.tfi_inkernel = + tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS && + tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS; + tprof_sample(NULL, &tfi); + } else { + /* counter has overflowed */ + counters_offset[bit] += __BIT(32); + } } reg_pmovsclr_el0_write(pmovs); return 1; } +static uint32_t +armv8_pmu_ident(void) +{ + return TPROF_IDENT_ARMV8_GENERIC; +} + +static const tprof_backend_ops_t tprof_armv8_pmu_ops = { + .tbo_ident = armv8_pmu_ident, + .tbo_ncounters = armv8_pmu_ncounters, + .tbo_counter_bitwidth = armv8_pmu_counter_bitwidth, + .tbo_counter_read = armv8_pmu_get_pmevcntr, + .tbo_counter_estimate_freq = armv8_pmu_counter_estimate_freq, + .tbo_valid_event = armv8_pmu_valid_event, + .tbo_configure_event = armv8_pmu_configure_event, + .tbo_start = armv8_pmu_start, + .tbo_stop = armv8_pmu_stop, + .tbo_establish = NULL, + .tbo_disestablish = NULL, +}; + static void armv8_pmu_init_cpu(void *arg1, void *arg2) { @@ -232,11 +276,32 @@ armv8_pmu_detect(void) int armv8_pmu_init(void) { + int error, ncounters; + KASSERT(armv8_pmu_detect()); + ncounters = armv8_pmu_ncounters(); + if (ncounters == 0) + return ENOTSUP; + + /* Is 64bit event counter available? */ + const uint64_t dfr0 = reg_id_aa64dfr0_el1_read(); + const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER); + if (pmuver >= ID_AA64DFR0_EL1_PMUVER_V3P5 && + ISSET(reg_pmcr_el0_read(), PMCR_LP)) + counter_bitwidth = 64; + else + counter_bitwidth = 32; + uint64_t xc = xc_broadcast(0, armv8_pmu_init_cpu, NULL, NULL); xc_wait(xc); - return tprof_backend_register("tprof_armv8", &tprof_armv8_pmu_ops, + error = tprof_backend_register("tprof_armv8", &tprof_armv8_pmu_ops, TPROF_BACKEND_VERSION); + if (error == 0) { + /* XXX: for argument of armv8_pmu_intr() */ + pmu_intr_arg = tprof_backend; + } + + return error; } Index: src/sys/dev/tprof/tprof_ioctl.h diff -u src/sys/dev/tprof/tprof_ioctl.h:1.4 src/sys/dev/tprof/tprof_ioctl.h:1.5 --- src/sys/dev/tprof/tprof_ioctl.h:1.4 Fri Jul 13 07:56:29 2018 +++ src/sys/dev/tprof/tprof_ioctl.h Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_ioctl.h,v 1.4 2018/07/13 07:56:29 maxv Exp $ */ +/* $NetBSD: tprof_ioctl.h,v 1.5 2022/12/01 00:32:52 ryo Exp $ */ /*- * Copyright (c)2008,2010 YAMAMOTO Takashi, @@ -37,17 +37,12 @@ #include <dev/tprof/tprof_types.h> -#define TPROF_VERSION 4 /* kernel-userland ABI version */ +#define TPROF_VERSION 5 /* kernel-userland ABI version */ struct tprof_info { uint32_t ti_version; uint32_t ti_ident; }; -#define TPROF_IOC_GETINFO _IOR('T', 1, struct tprof_info) - -#define TPROF_IOC_START _IOW('T', 2, tprof_param_t) - -#define TPROF_IOC_STOP _IO('T', 3) struct tprof_stat { uint64_t ts_sample; /* samples successfully recorded */ @@ -57,6 +52,13 @@ struct tprof_stat { uint64_t ts_dropbuf; /* buffers dropped due to the global limit */ uint64_t ts_dropbuf_sample; /* samples dropped with ts_dropbuf */ }; -#define TPROF_IOC_GETSTAT _IOR('T', 4, struct tprof_stat) + +#define TPROF_IOC_GETINFO _IOR('T', 1, struct tprof_info) +#define TPROF_IOC_START _IOW('T', 2, tprof_countermask_t) +#define TPROF_IOC_STOP _IOW('T', 3, tprof_countermask_t) +#define TPROF_IOC_GETSTAT _IOR('T', 4, struct tprof_stat) +#define TPROF_IOC_GETNCOUNTERS _IOR('T', 5, u_int) +#define TPROF_IOC_CONFIGURE_EVENT _IOW('T', 6, tprof_param_t) +#define TPROF_IOC_GETCOUNTS _IOWR('T', 7, tprof_counts_t) #endif /* _DEV_TPROF_TPROF_IOCTL_H_ */ Index: src/sys/dev/tprof/tprof_x86_intel.c diff -u src/sys/dev/tprof/tprof_x86_intel.c:1.4 src/sys/dev/tprof/tprof_x86_intel.c:1.5 --- src/sys/dev/tprof/tprof_x86_intel.c:1.4 Thu May 26 13:02:04 2022 +++ src/sys/dev/tprof/tprof_x86_intel.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_x86_intel.c,v 1.4 2022/05/26 13:02:04 msaitoh Exp $ */ +/* $NetBSD: tprof_x86_intel.c,v 1.5 2022/12/01 00:32:52 ryo Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -56,15 +56,15 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_x86_intel.c,v 1.4 2022/05/26 13:02:04 msaitoh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_x86_intel.c,v 1.5 2022/12/01 00:32:52 ryo Exp $"); #include <sys/param.h> #include <sys/systm.h> -#include <sys/device.h> #include <sys/kernel.h> #include <sys/module.h> #include <sys/cpu.h> +#include <sys/percpu.h> #include <sys/xcall.h> #include <dev/tprof/tprof.h> @@ -79,6 +79,12 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_in #include <machine/i82489reg.h> #include <machine/i82489var.h> +#define NCTRS 4 /* XXX */ +static u_int counter_bitwidth; + +#define PERFEVTSEL(i) (MSR_EVNTSEL0 + (i)) +#define PERFCTR(i) (MSR_PERFCTR0 + (i)) + #define PERFEVTSEL_EVENT_SELECT __BITS(0, 7) #define PERFEVTSEL_UNIT_MASK __BITS(8, 15) #define PERFEVTSEL_USR __BIT(16) @@ -90,72 +96,115 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_in #define PERFEVTSEL_INV __BIT(23) #define PERFEVTSEL_COUNTER_MASK __BITS(24, 31) -static uint64_t counter_bitwidth; -static uint64_t counter_val = 5000000; -static uint64_t counter_reset_val; - static uint32_t intel_lapic_saved[MAXCPUS]; static nmi_handler_t *intel_nmi_handle; -static tprof_param_t intel_param; + +static uint32_t +tprof_intel_ncounters(void) +{ + return NCTRS; +} + +static u_int +tprof_intel_counter_bitwidth(u_int counter) +{ + return counter_bitwidth; +} + +static inline void +tprof_intel_counter_write(u_int counter, uint64_t val) +{ + wrmsr(PERFCTR(counter), val); +} + +static inline uint64_t +tprof_intel_counter_read(u_int counter) +{ + return rdmsr(PERFCTR(counter)); +} static void -tprof_intel_start_cpu(void *arg1, void *arg2) +tprof_intel_configure_event(u_int counter, const tprof_param_t *param) { - struct cpu_info * const ci = curcpu(); uint64_t evtval; evtval = - __SHIFTIN(intel_param.p_event, PERFEVTSEL_EVENT_SELECT) | - __SHIFTIN(intel_param.p_unit, PERFEVTSEL_UNIT_MASK) | - ((intel_param.p_flags & TPROF_PARAM_USER) ? PERFEVTSEL_USR : 0) | - ((intel_param.p_flags & TPROF_PARAM_KERN) ? PERFEVTSEL_OS : 0) | - PERFEVTSEL_INT | - PERFEVTSEL_EN; + __SHIFTIN(param->p_event, PERFEVTSEL_EVENT_SELECT) | + __SHIFTIN(param->p_unit, PERFEVTSEL_UNIT_MASK) | + ((param->p_flags & TPROF_PARAM_USER) ? PERFEVTSEL_USR : 0) | + ((param->p_flags & TPROF_PARAM_KERN) ? PERFEVTSEL_OS : 0) | + PERFEVTSEL_INT; + wrmsr(PERFEVTSEL(counter), evtval); - wrmsr(MSR_PERFCTR0, counter_reset_val); - wrmsr(MSR_EVNTSEL0, evtval); - - intel_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT); - lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI); + /* reset the counter */ + tprof_intel_counter_write(counter, param->p_value); } static void -tprof_intel_stop_cpu(void *arg1, void *arg2) +tprof_intel_start(tprof_countermask_t runmask) { - struct cpu_info * const ci = curcpu(); + int bit; - wrmsr(MSR_EVNTSEL0, 0); - wrmsr(MSR_PERFCTR0, 0); + while ((bit = ffs(runmask)) != 0) { + bit--; + CLR(runmask, __BIT(bit)); + wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) | PERFEVTSEL_EN); + } +} - lapic_writereg(LAPIC_LVT_PCINT, intel_lapic_saved[cpu_index(ci)]); +static void +tprof_intel_stop(tprof_countermask_t stopmask) +{ + int bit; + + while ((bit = ffs(stopmask)) != 0) { + bit--; + CLR(stopmask, __BIT(bit)); + wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) & ~PERFEVTSEL_EN); + } } static int -tprof_intel_nmi(const struct trapframe *tf, void *dummy) +tprof_intel_nmi(const struct trapframe *tf, void *arg) { - uint32_t pcint; - uint64_t ctr; + tprof_backend_softc_t *sc = arg; tprof_frame_info_t tfi; + uint32_t pcint; + int bit; - KASSERT(dummy == NULL); - - ctr = rdmsr(MSR_PERFCTR0); - /* If the highest bit is non zero, then it's not for us. */ - if ((ctr & __BIT(counter_bitwidth-1)) != 0) { - return 0; - } + uint64_t *counters_offset = + percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu()); + tprof_countermask_t mask = sc->sc_ctr_ovf_mask; + while ((bit = ffs(mask)) != 0) { + bit--; + CLR(mask, __BIT(bit)); + + /* If the highest bit is non zero, then it's not for us. */ + uint64_t ctr = tprof_intel_counter_read(bit); + if ((ctr & __BIT(counter_bitwidth - 1)) != 0) + continue; /* not overflowed */ + + if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { + /* account for the counter, and reset */ + tprof_intel_counter_write(bit, + sc->sc_count[bit].ctr_counter_reset_val); + counters_offset[bit] += + sc->sc_count[bit].ctr_counter_val + ctr; - /* record a sample */ + /* record a sample */ #if defined(__x86_64__) - tfi.tfi_pc = tf->tf_rip; + tfi.tfi_pc = tf->tf_rip; #else - tfi.tfi_pc = tf->tf_eip; + tfi.tfi_pc = tf->tf_eip; #endif - tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; - tprof_sample(NULL, &tfi); - - /* reset counter */ - wrmsr(MSR_PERFCTR0, counter_reset_val); + tfi.tfi_counter = bit; + tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; + tprof_sample(NULL, &tfi); + } else { + /* not profiled, but require to consider overflow */ + counters_offset[bit] += __BIT(counter_bitwidth); + } + } /* unmask PMI */ pcint = lapic_readreg(LAPIC_LVT_PCINT); @@ -166,16 +215,9 @@ tprof_intel_nmi(const struct trapframe * } static uint64_t -tprof_intel_estimate_freq(void) +tprof_intel_counter_estimate_freq(u_int counter) { - uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq; - uint64_t freq = 10000; - - counter_val = cpufreq / freq; - if (counter_val == 0) { - counter_val = UINT64_C(4000000000) / freq; - } - return freq; + return curcpu()->ci_data.cpu_cc_freq; } static uint32_t @@ -203,8 +245,25 @@ tprof_intel_ident(void) return TPROF_IDENT_INTEL_GENERIC; } +static void +tprof_intel_establish_cpu(void *arg1, void *arg2) +{ + struct cpu_info * const ci = curcpu(); + + intel_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT); + lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI); +} + +static void +tprof_intel_disestablish_cpu(void *arg1, void *arg2) +{ + struct cpu_info * const ci = curcpu(); + + lapic_writereg(LAPIC_LVT_PCINT, intel_lapic_saved[cpu_index(ci)]); +} + static int -tprof_intel_start(const tprof_param_t *param) +tprof_intel_establish(tprof_backend_softc_t *sc) { uint64_t xc; @@ -213,23 +272,20 @@ tprof_intel_start(const tprof_param_t *p } KASSERT(intel_nmi_handle == NULL); - intel_nmi_handle = nmi_establish(tprof_intel_nmi, NULL); - - counter_reset_val = - counter_val + 1; - memcpy(&intel_param, param, sizeof(*param)); + intel_nmi_handle = nmi_establish(tprof_intel_nmi, sc); - xc = xc_broadcast(0, tprof_intel_start_cpu, NULL, NULL); + xc = xc_broadcast(0, tprof_intel_establish_cpu, sc, NULL); xc_wait(xc); return 0; } static void -tprof_intel_stop(const tprof_param_t *param) +tprof_intel_disestablish(tprof_backend_softc_t *sc) { uint64_t xc; - xc = xc_broadcast(0, tprof_intel_stop_cpu, NULL, NULL); + xc = xc_broadcast(0, tprof_intel_disestablish_cpu, sc, NULL); xc_wait(xc); KASSERT(intel_nmi_handle != NULL); @@ -238,8 +294,15 @@ tprof_intel_stop(const tprof_param_t *pa } const tprof_backend_ops_t tprof_intel_ops = { - .tbo_estimate_freq = tprof_intel_estimate_freq, .tbo_ident = tprof_intel_ident, + .tbo_ncounters = tprof_intel_ncounters, + .tbo_counter_bitwidth = tprof_intel_counter_bitwidth, + .tbo_counter_read = tprof_intel_counter_read, + .tbo_counter_estimate_freq = tprof_intel_counter_estimate_freq, + .tbo_valid_event = NULL, + .tbo_configure_event = tprof_intel_configure_event, .tbo_start = tprof_intel_start, .tbo_stop = tprof_intel_stop, + .tbo_establish = tprof_intel_establish, + .tbo_disestablish = tprof_intel_disestablish, }; Index: src/sys/dev/tprof/tprof_types.h diff -u src/sys/dev/tprof/tprof_types.h:1.5 src/sys/dev/tprof/tprof_types.h:1.6 --- src/sys/dev/tprof/tprof_types.h:1.5 Sun Jul 15 23:46:25 2018 +++ src/sys/dev/tprof/tprof_types.h Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_types.h,v 1.5 2018/07/15 23:46:25 jmcneill Exp $ */ +/* $NetBSD: tprof_types.h,v 1.6 2022/12/01 00:32:52 ryo Exp $ */ /*- * Copyright (c)2010,2011 YAMAMOTO Takashi, @@ -39,26 +39,55 @@ #include <stdint.h> #endif +#define TPROF_MAXCOUNTERS 32 +typedef uint32_t tprof_countermask_t; +#define TPROF_COUNTERMASK_ALL __BITS(31, 0) + typedef struct { uint32_t s_pid; /* process id */ uint32_t s_lwpid; /* lwp id */ uint32_t s_cpuid; /* cpu id */ - uint32_t s_flags; /* flags */ + uint32_t s_flags; /* flags and counterID */ +#define TPROF_SAMPLE_INKERNEL 0x00000001 /* s_pc is in kernel address space */ +#define TPROF_SAMPLE_COUNTER_MASK 0xff000000 /* 0..(TPROF_MAXCOUNTERS-1) */ uintptr_t s_pc; /* program counter */ } tprof_sample_t; typedef struct tprof_param { + u_int p_counter; /* 0..(TPROF_MAXCOUNTERS-1) */ + u_int p__unused; uint64_t p_event; /* event class */ uint64_t p_unit; /* unit within the event class */ uint64_t p_flags; +#define TPROF_PARAM_KERN 0x1 +#define TPROF_PARAM_USER 0x2 +#define TPROF_PARAM_PROFILE 0x4 +#define TPROF_PARAM_VALUE2_MASK __BITS(63, 60) +#define TPROF_PARAM_VALUE2_SCALE __SHIFTIN(1, TPROF_PARAM_VALUE2_MASK) +#define TPROF_PARAM_VALUE2_TRIGGERCOUNT __SHIFTIN(2, TPROF_PARAM_VALUE2_MASK) + uint64_t p_value; /* initial value */ + uint64_t p_value2; + /* + * p_value2 is an optional value. (p_flags & TPROF_PARAM_VALUE2_MASK) + * determines the usage. + * + * TPROF_PARAM_VALUE2_SCALE: + * Specify the counter speed as the reciprocal of the cycle counter + * speed ratio. if the counter is N times slower than the cycle + * counter, p_value2 is (0x1_0000_0000 / N). 0 is treated as 1.0. + * TPROF_PARAM_VALUE2_TRIGGERCOUNT: + * When the event counter counts up p_value2, an interrupt for profile + * is generated. 0 is treated as 1. + */ } tprof_param_t; -/* s_flags */ -#define TPROF_SAMPLE_INKERNEL 1 /* s_pc is in kernel address space */ - -/* p_flags */ -#define TPROF_PARAM_KERN 0x01 -#define TPROF_PARAM_USER 0x02 +typedef struct tprof_counts { + uint32_t c_cpu; /* W */ + uint32_t c_ncounters; /* R */ + tprof_countermask_t c_runningmask; /* R */ + uint32_t c__unused; + uint64_t c_count[TPROF_MAXCOUNTERS]; /* R */ +} tprof_counts_t; /* ti_ident */ #define TPROF_IDENT_NONE 0x00 Index: src/sys/dev/tprof/tprof_x86_amd.c diff -u src/sys/dev/tprof/tprof_x86_amd.c:1.5 src/sys/dev/tprof/tprof_x86_amd.c:1.6 --- src/sys/dev/tprof/tprof_x86_amd.c:1.5 Fri Oct 11 18:04:52 2019 +++ src/sys/dev/tprof/tprof_x86_amd.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_x86_amd.c,v 1.5 2019/10/11 18:04:52 jmcneill Exp $ */ +/* $NetBSD: tprof_x86_amd.c,v 1.6 2022/12/01 00:32:52 ryo Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -56,7 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_x86_amd.c,v 1.5 2019/10/11 18:04:52 jmcneill Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_x86_amd.c,v 1.6 2022/12/01 00:32:52 ryo Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -64,6 +64,7 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_am #include <sys/module.h> #include <sys/cpu.h> +#include <sys/percpu.h> #include <sys/xcall.h> #include <dev/tprof/tprof.h> @@ -78,7 +79,8 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_am #include <machine/i82489reg.h> #include <machine/i82489var.h> -#define NCTRS 4 +#define NCTRS 4 +#define COUNTER_BITWIDTH 48 #define PERFEVTSEL(i) (0xc0010000 + (i)) #define PERFCTR(i) (0xc0010004 + (i)) @@ -106,92 +108,128 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_x86_am * http://developer.amd.com/wordpress/media/2012/10/Basic_Performance_Measurements.pdf */ -static int ctrno = 0; -static uint64_t counter_val = 5000000; -static uint64_t counter_reset_val; static uint32_t amd_lapic_saved[MAXCPUS]; static nmi_handler_t *amd_nmi_handle; -static tprof_param_t amd_param; + +static uint32_t +tprof_amd_ncounters(void) +{ + return NCTRS; +} + +static u_int +tprof_amd_counter_bitwidth(u_int counter) +{ + return COUNTER_BITWIDTH; +} + +static inline void +tprof_amd_counter_write(u_int counter, uint64_t val) +{ + wrmsr(PERFCTR(counter), val); +} + +static inline uint64_t +tprof_amd_counter_read(u_int counter) +{ + return rdmsr(PERFCTR(counter)); +} static void -tprof_amd_start_cpu(void *arg1, void *arg2) +tprof_amd_configure_event(u_int counter, const tprof_param_t *param) { - struct cpu_info * const ci = curcpu(); uint64_t pesr; uint64_t event_lo; uint64_t event_hi; - event_hi = amd_param.p_event >> 8; - event_lo = amd_param.p_event & 0xff; + event_hi = param->p_event >> 8; + event_lo = param->p_event & 0xff; pesr = - ((amd_param.p_flags & TPROF_PARAM_USER) ? PESR_USR : 0) | - ((amd_param.p_flags & TPROF_PARAM_KERN) ? PESR_OS : 0) | + ((param->p_flags & TPROF_PARAM_USER) ? PESR_USR : 0) | + ((param->p_flags & TPROF_PARAM_KERN) ? PESR_OS : 0) | PESR_INT | __SHIFTIN(event_lo, PESR_EVENT_MASK_LO) | __SHIFTIN(event_hi, PESR_EVENT_MASK_HI) | __SHIFTIN(0, PESR_COUNTER_MASK) | - __SHIFTIN(amd_param.p_unit, PESR_UNIT_MASK); + __SHIFTIN(param->p_unit, PESR_UNIT_MASK); + wrmsr(PERFEVTSEL(counter), pesr); - wrmsr(PERFCTR(ctrno), counter_reset_val); - wrmsr(PERFEVTSEL(ctrno), pesr); + /* reset the counter */ + tprof_amd_counter_write(counter, param->p_value); +} - amd_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT); - lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI); +static void +tprof_amd_start(tprof_countermask_t runmask) +{ + int bit; - wrmsr(PERFEVTSEL(ctrno), pesr | PESR_EN); + while ((bit = ffs(runmask)) != 0) { + bit--; + CLR(runmask, __BIT(bit)); + wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) | PESR_EN); + } } static void -tprof_amd_stop_cpu(void *arg1, void *arg2) +tprof_amd_stop(tprof_countermask_t stopmask) { - struct cpu_info * const ci = curcpu(); + int bit; - wrmsr(PERFEVTSEL(ctrno), 0); - - lapic_writereg(LAPIC_LVT_PCINT, amd_lapic_saved[cpu_index(ci)]); + while ((bit = ffs(stopmask)) != 0) { + bit--; + CLR(stopmask, __BIT(bit)); + wrmsr(PERFEVTSEL(bit), rdmsr(PERFEVTSEL(bit)) & ~PESR_EN); + } } static int -tprof_amd_nmi(const struct trapframe *tf, void *dummy) +tprof_amd_nmi(const struct trapframe *tf, void *arg) { + tprof_backend_softc_t *sc = arg; tprof_frame_info_t tfi; - uint64_t ctr; - - KASSERT(dummy == NULL); + int bit; - /* check if it's for us */ - ctr = rdmsr(PERFCTR(ctrno)); - if ((ctr & (UINT64_C(1) << 63)) != 0) { /* check if overflowed */ - /* not ours */ - return 0; - } + uint64_t *counters_offset = + percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu()); + tprof_countermask_t mask = sc->sc_ctr_ovf_mask; + while ((bit = ffs(mask)) != 0) { + bit--; + CLR(mask, __BIT(bit)); + + /* If the highest bit is non zero, then it's not for us. */ + uint64_t ctr = tprof_amd_counter_read(bit); + if ((ctr & __BIT(COUNTER_BITWIDTH - 1)) != 0) + continue; /* not overflowed */ + + if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { + /* account for the counter, and reset */ + tprof_amd_counter_write(bit, + sc->sc_count[bit].ctr_counter_reset_val); + counters_offset[bit] += + sc->sc_count[bit].ctr_counter_val + ctr; - /* record a sample */ + /* record a sample */ #if defined(__x86_64__) - tfi.tfi_pc = tf->tf_rip; + tfi.tfi_pc = tf->tf_rip; #else - tfi.tfi_pc = tf->tf_eip; + tfi.tfi_pc = tf->tf_eip; #endif - tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; - tprof_sample(NULL, &tfi); - - /* reset counter */ - wrmsr(PERFCTR(ctrno), counter_reset_val); + tfi.tfi_counter = bit; + tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; + tprof_sample(NULL, &tfi); + } else { + /* not profiled, but require to consider overflow */ + counters_offset[bit] += __BIT(COUNTER_BITWIDTH); + } + } return 1; } static uint64_t -tprof_amd_estimate_freq(void) +tprof_amd_counter_estimate_freq(u_int counter) { - uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq; - uint64_t freq = 10000; - - counter_val = cpufreq / freq; - if (counter_val == 0) { - counter_val = UINT64_C(4000000000) / freq; - } - return freq; + return curcpu()->ci_data.cpu_cc_freq; } static uint32_t @@ -213,8 +251,25 @@ tprof_amd_ident(void) return TPROF_IDENT_NONE; } +static void +tprof_amd_establish_cpu(void *arg1, void *arg2) +{ + struct cpu_info * const ci = curcpu(); + + amd_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_LVT_PCINT); + lapic_writereg(LAPIC_LVT_PCINT, LAPIC_DLMODE_NMI); +} + +static void +tprof_amd_disestablish_cpu(void *arg1, void *arg2) +{ + struct cpu_info * const ci = curcpu(); + + lapic_writereg(LAPIC_LVT_PCINT, amd_lapic_saved[cpu_index(ci)]); +} + static int -tprof_amd_start(const tprof_param_t *param) +tprof_amd_establish(tprof_backend_softc_t *sc) { uint64_t xc; @@ -223,23 +278,20 @@ tprof_amd_start(const tprof_param_t *par } KASSERT(amd_nmi_handle == NULL); - amd_nmi_handle = nmi_establish(tprof_amd_nmi, NULL); - - counter_reset_val = - counter_val + 1; - memcpy(&amd_param, param, sizeof(*param)); + amd_nmi_handle = nmi_establish(tprof_amd_nmi, sc); - xc = xc_broadcast(0, tprof_amd_start_cpu, NULL, NULL); + xc = xc_broadcast(0, tprof_amd_establish_cpu, sc, NULL); xc_wait(xc); return 0; } static void -tprof_amd_stop(const tprof_param_t *param) +tprof_amd_disestablish(tprof_backend_softc_t *sc) { uint64_t xc; - xc = xc_broadcast(0, tprof_amd_stop_cpu, NULL, NULL); + xc = xc_broadcast(0, tprof_amd_disestablish_cpu, sc, NULL); xc_wait(xc); KASSERT(amd_nmi_handle != NULL); @@ -248,8 +300,15 @@ tprof_amd_stop(const tprof_param_t *para } const tprof_backend_ops_t tprof_amd_ops = { - .tbo_estimate_freq = tprof_amd_estimate_freq, .tbo_ident = tprof_amd_ident, + .tbo_ncounters = tprof_amd_ncounters, + .tbo_counter_bitwidth = tprof_amd_counter_bitwidth, + .tbo_counter_read = tprof_amd_counter_read, + .tbo_counter_estimate_freq = tprof_amd_counter_estimate_freq, + .tbo_valid_event = NULL, + .tbo_configure_event = tprof_amd_configure_event, .tbo_start = tprof_amd_start, .tbo_stop = tprof_amd_stop, + .tbo_establish = tprof_amd_establish, + .tbo_disestablish = tprof_amd_disestablish, }; Index: src/sys/dev/tprof/tprof_x86.c diff -u src/sys/dev/tprof/tprof_x86.c:1.1 src/sys/dev/tprof/tprof_x86.c:1.2 --- src/sys/dev/tprof/tprof_x86.c:1.1 Tue Jul 24 09:47:35 2018 +++ src/sys/dev/tprof/tprof_x86.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_x86.c,v 1.1 2018/07/24 09:47:35 maxv Exp $ */ +/* $NetBSD: tprof_x86.c,v 1.2 2022/12/01 00:32:52 ryo Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_x86.c,v 1.1 2018/07/24 09:47:35 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_x86.c,v 1.2 2022/12/01 00:32:52 ryo Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -54,16 +54,28 @@ extern const tprof_backend_ops_t tprof_i static int tprof_x86_init(void) { + const tprof_backend_ops_t *ops; + const char *name; + int ncounters; + switch (cpu_vendor) { case CPUVENDOR_AMD: - return tprof_backend_register("tprof_amd", &tprof_amd_ops, - TPROF_BACKEND_VERSION); + name = "tprof_amd"; + ops = &tprof_amd_ops; + break; case CPUVENDOR_INTEL: - return tprof_backend_register("tprof_intel", &tprof_intel_ops, - TPROF_BACKEND_VERSION); + name = "tprof_intel"; + ops = &tprof_intel_ops; + break; default: return ENOTSUP; } + + ncounters = ops->tbo_ncounters(); + if (ncounters == 0) + return ENOTSUP; + + return tprof_backend_register(name, ops, TPROF_BACKEND_VERSION); } static int Index: src/usr.sbin/tprof/tprof.8 diff -u src/usr.sbin/tprof/tprof.8:1.16 src/usr.sbin/tprof/tprof.8:1.17 --- src/usr.sbin/tprof/tprof.8:1.16 Wed May 25 06:17:19 2022 +++ src/usr.sbin/tprof/tprof.8 Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -.\" $NetBSD: tprof.8,v 1.16 2022/05/25 06:17:19 msaitoh Exp $ +.\" $NetBSD: tprof.8,v 1.17 2022/12/01 00:32:52 ryo Exp $ .\" .\" Copyright (c)2011 YAMAMOTO Takashi, .\" All rights reserved. @@ -24,7 +24,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd October 11, 2019 +.Dd December 1, 2022 .Dt TPROF 8 .Os .Sh NAME @@ -67,6 +67,7 @@ Display a list of performance counter ev .It monitor Xo .Fl e .Ar name:option +.Op Fl e Ar ... .Op Fl o Ar outfile .Ar command .Xc Index: src/usr.sbin/tprof/tprof.c diff -u src/usr.sbin/tprof/tprof.c:1.13 src/usr.sbin/tprof/tprof.c:1.14 --- src/usr.sbin/tprof/tprof.c:1.13 Tue Jul 24 09:50:37 2018 +++ src/usr.sbin/tprof/tprof.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.c,v 1.13 2018/07/24 09:50:37 maxv Exp $ */ +/* $NetBSD: tprof.c,v 1.14 2022/12/01 00:32:52 ryo Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -57,7 +57,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: tprof.c,v 1.13 2018/07/24 09:50:37 maxv Exp $"); +__RCSID("$NetBSD: tprof.c,v 1.14 2022/12/01 00:32:52 ryo Exp $"); #endif /* not lint */ #include <sys/ioctl.h> @@ -80,8 +80,11 @@ __RCSID("$NetBSD: tprof.c,v 1.13 2018/07 #define _PATH_TPROF "/dev/tprof" +struct tprof_info tprof_info; +u_int ncounters; int devfd; int outfd; +u_int nevent; static void tprof_list(int, char **); static void tprof_monitor(int, char **) __dead; @@ -106,7 +109,7 @@ usage(void) fprintf(stderr, "\n"); fprintf(stderr, "\tlist\n"); fprintf(stderr, "\t\tList the available events.\n"); - fprintf(stderr, "\tmonitor -e name:option [-o outfile] command\n"); + fprintf(stderr, "\tmonitor -e name:option [-e ...] [-o outfile] command\n"); fprintf(stderr, "\t\tMonitor the event 'name' with option 'option'\n" "\t\tcounted during the execution of 'command'.\n"); fprintf(stderr, "\tanalyze [-CkLPs] [-p pid] file\n"); @@ -156,14 +159,15 @@ static void tprof_monitor(int argc, char **argv) { const char *outfile = "tprof.out"; - struct tprof_param param; struct tprof_stat ts; + tprof_param_t params[TPROF_MAXCOUNTERS]; pid_t pid; pthread_t pt; - int ret, ch; + int ret, ch, i; char *tokens[2]; + tprof_countermask_t mask = TPROF_COUNTERMASK_ALL; - memset(¶m, 0, sizeof(param)); + memset(params, 0, sizeof(params)); while ((ch = getopt(argc, argv, "o:e:")) != -1) { switch (ch) { @@ -175,11 +179,17 @@ tprof_monitor(int argc, char **argv) tokens[1] = strtok(NULL, ":"); if (tokens[1] == NULL) usage(); - tprof_event_lookup(tokens[0], ¶m); + tprof_event_lookup(tokens[0], ¶ms[nevent]); if (strchr(tokens[1], 'u')) - param.p_flags |= TPROF_PARAM_USER; + params[nevent].p_flags |= TPROF_PARAM_USER; if (strchr(tokens[1], 'k')) - param.p_flags |= TPROF_PARAM_KERN; + params[nevent].p_flags |= TPROF_PARAM_KERN; + if (params[nevent].p_flags == 0) + usage(); + nevent++; + if (nevent > __arraycount(params) || + nevent > ncounters) + errx(EXIT_FAILURE, "Too many events"); break; default: usage(); @@ -187,11 +197,7 @@ tprof_monitor(int argc, char **argv) } argc -= optind; argv += optind; - if (argc == 0) { - usage(); - } - - if (param.p_flags == 0) { + if (argc == 0 || nevent == 0) { usage(); } @@ -200,7 +206,15 @@ tprof_monitor(int argc, char **argv) err(EXIT_FAILURE, "%s", outfile); } - ret = ioctl(devfd, TPROF_IOC_START, ¶m); + for (i = 0; i < (int)nevent; i++) { + params[i].p_counter = i; + params[i].p_flags |= TPROF_PARAM_PROFILE; + ret = ioctl(devfd, TPROF_IOC_CONFIGURE_EVENT, ¶ms[i]); + if (ret == -1) + err(EXIT_FAILURE, "TPROF_IOC_CONFIGURE_EVENT"); + } + + ret = ioctl(devfd, TPROF_IOC_START, &mask); if (ret == -1) { err(EXIT_FAILURE, "TPROF_IOC_START"); } @@ -237,7 +251,7 @@ tprof_monitor(int argc, char **argv) } } - ret = ioctl(devfd, TPROF_IOC_STOP, NULL); + ret = ioctl(devfd, TPROF_IOC_STOP, &mask); if (ret == -1) { err(EXIT_FAILURE, "TPROF_IOC_STOP"); } @@ -263,7 +277,6 @@ tprof_monitor(int argc, char **argv) int main(int argc, char *argv[]) { - struct tprof_info info; const struct cmdtab *ct; int ret; @@ -275,18 +288,26 @@ main(int argc, char *argv[]) err(EXIT_FAILURE, "%s", _PATH_TPROF); } - ret = ioctl(devfd, TPROF_IOC_GETINFO, &info); + ret = ioctl(devfd, TPROF_IOC_GETINFO, &tprof_info); if (ret == -1) { err(EXIT_FAILURE, "TPROF_IOC_GETINFO"); } - if (info.ti_version != TPROF_VERSION) { + if (tprof_info.ti_version != TPROF_VERSION) { errx(EXIT_FAILURE, "version mismatch: version=%d, expected=%d", - info.ti_version, TPROF_VERSION); + tprof_info.ti_version, TPROF_VERSION); } - if (tprof_event_init(info.ti_ident) == -1) { + if (tprof_event_init(tprof_info.ti_ident) == -1) { errx(EXIT_FAILURE, "cpu not supported"); } + ret = ioctl(devfd, TPROF_IOC_GETNCOUNTERS, &ncounters); + if (ret == -1) { + err(EXIT_FAILURE, "TPROF_IOC_GETNCOUNTERS"); + } + if (ncounters == 0) { + errx(EXIT_FAILURE, "no available counters"); + } + if (argc == 0) usage(); Index: src/usr.sbin/tprof/tprof_analyze.c diff -u src/usr.sbin/tprof/tprof_analyze.c:1.5 src/usr.sbin/tprof/tprof_analyze.c:1.6 --- src/usr.sbin/tprof/tprof_analyze.c:1.5 Thu Oct 14 09:52:40 2021 +++ src/usr.sbin/tprof/tprof_analyze.c Thu Dec 1 00:32:52 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_analyze.c,v 1.5 2021/10/14 09:52:40 skrll Exp $ */ +/* $NetBSD: tprof_analyze.c,v 1.6 2022/12/01 00:32:52 ryo Exp $ */ /* * Copyright (c) 2010,2011,2012 YAMAMOTO Takashi, @@ -28,7 +28,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: tprof_analyze.c,v 1.5 2021/10/14 09:52:40 skrll Exp $"); +__RCSID("$NetBSD: tprof_analyze.c,v 1.6 2022/12/01 00:32:52 ryo Exp $"); #endif /* not lint */ #include <assert.h> @@ -63,6 +63,7 @@ struct addr { uint32_t cpuid; /* cpu id */ bool in_kernel; /* if addr is in the kernel address space */ unsigned int nsamples; /* number of samples taken for the address */ + unsigned int ncount[TPROF_MAXCOUNTERS]; /* count per event */ }; static rb_tree_t addrtree; @@ -278,6 +279,7 @@ tprof_analyze(int argc, char **argv) size_t naddrs, nsamples, i; float perc; int ch; + u_int c, maxevent = 0; bool distinguish_processes = true; bool distinguish_cpus = true; bool distinguish_lwps = true; @@ -363,6 +365,7 @@ tprof_analyze(int argc, char **argv) continue; } a = emalloc(sizeof(*a)); + memset(a, 0, sizeof(*a)); a->addr = (uint64_t)sample.s_pc; if (distinguish_processes) { a->pid = sample.s_pid; @@ -389,7 +392,13 @@ tprof_analyze(int argc, char **argv) a->addr -= offset; } } + c = __SHIFTOUT(sample.s_flags, TPROF_SAMPLE_COUNTER_MASK); + assert(c < TPROF_MAXCOUNTERS); + if (maxevent < c) + maxevent = c; + a->nsamples = 1; + a->ncount[c] = 1; o = rb_tree_insert_node(&addrtree, a); if (o != a) { assert(a->addr == o->addr); @@ -398,7 +407,9 @@ tprof_analyze(int argc, char **argv) assert(a->cpuid == o->cpuid); assert(a->in_kernel == o->in_kernel); free(a); + o->nsamples++; + o->ncount[c]++; } else { naddrs++; } @@ -423,8 +434,17 @@ tprof_analyze(int argc, char **argv) */ printf("File: %s\n", argv[0]); printf("Number of samples: %zu\n\n", nsamples); - printf("percentage nsamples pid lwp cpu k address symbol\n"); - printf("------------ -------- ------ ------ ---- - ---------------- ------\n"); + + printf("percentage nsamples "); + for (c = 0; c <= maxevent; c++) + printf("event#%02u ", c); + printf("pid lwp cpu k address symbol\n"); + + printf("------------ -------- "); + for (c = 0; c <= maxevent; c++) + printf("-------- "); + + printf("------ ------ ---- - ---------------- ------\n"); for (i = 0; i < naddrs; i++) { const char *name; char buf[100]; @@ -448,11 +468,17 @@ tprof_analyze(int argc, char **argv) perc = ((float)a->nsamples / (float)nsamples) * 100.0; - printf("%11f%% %8u %6" PRIu32 " %6" PRIu32 " %4" PRIu32 " %u %016" - PRIx64 " %s\n", - perc, - a->nsamples, a->pid, a->lwpid, a->cpuid, a->in_kernel, - a->addr, name); + printf("%11f%% %8u", perc, a->nsamples); + + for (c = 0; c <= maxevent; c++) + printf(" %8u", a->ncount[c]); + + printf(" %6" PRIu32 " %6" PRIu32 " %4" PRIu32 " %u %016" + PRIx64" %s", + a->pid, a->lwpid, a->cpuid, a->in_kernel, a->addr, name); + + + printf("\n"); } fclose(f);