Module Name: src Committed By: maxv Date: Fri Jul 13 07:56:29 UTC 2018
Modified Files: src/sys/arch/x86/x86: tprof_amdpmi.c tprof_pmi.c src/sys/dev/tprof: tprof.c tprof.h tprof_ioctl.h tprof_types.h src/usr.sbin/tprof: Makefile tprof.8 tprof.c Added Files: src/usr.sbin/tprof: tprof.h src/usr.sbin/tprof/arch: tprof_noarch.c tprof_x86.c Removed Files: src/usr.sbin/tprof: README tpann.sh tpfmt.sh Log Message: Revamp tprof. Rewrite the Intel backend to use the generic PMC interface, which is available on all Intel CPUs. Synchronize the AMD backend with the new interface. The kernel identifies the PMC interface, and gives its id to userland. Userland then queries the events itself (via cpuid etc). These events depend on the PMC interface. The tprof utility is rewritten to allow the user to choose which event to count (which was not possible until now, the event was hardcoded in the backend). The command line format is based on usr.bin/pmc, eg: tprof -e llc-misses:k -o output sleep 20 The man page is updated too, but the arguments will likely change soon anyway so it doesn't matter a lot. The tprof utility has three tables: Intel Architectural Version 1 Intel Skylake/Kabylake AMD Family 10h A CPU can support a combination of tables. For example Kabylake has Intel-Architectural-Version-1 and its own Intel-Kabylake table. For now the Intel Skylake/Kabylake table contains only one event, just to demonstrate that the combination of tables works. Tested on an Intel Core i5 Kabylake. The code for AMD Family 10h is taken from the code I had written for usr.bin/pmc. I haven't tested it yet, but it's the same as pmc(1), so I guess it works as-is. The whole thing is written in such a way that (I think) it is not complicated to add more CPU models, and more architectures (other than x86). To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 src/sys/arch/x86/x86/tprof_amdpmi.c cvs rdiff -u -r1.14 -r1.15 src/sys/arch/x86/x86/tprof_pmi.c cvs rdiff -u -r1.13 -r1.14 src/sys/dev/tprof/tprof.c cvs rdiff -u -r1.5 -r1.6 src/sys/dev/tprof/tprof.h cvs rdiff -u -r1.3 -r1.4 src/sys/dev/tprof/tprof_ioctl.h cvs rdiff -u -r1.2 -r1.3 src/sys/dev/tprof/tprof_types.h cvs rdiff -u -r1.3 -r1.4 src/usr.sbin/tprof/Makefile \ src/usr.sbin/tprof/tprof.8 cvs rdiff -u -r1.9 -r0 src/usr.sbin/tprof/README cvs rdiff -u -r1.2 -r0 src/usr.sbin/tprof/tpann.sh cvs rdiff -u -r1.3 -r0 src/usr.sbin/tprof/tpfmt.sh cvs rdiff -u -r1.5 -r1.6 src/usr.sbin/tprof/tprof.c cvs rdiff -u -r0 -r1.1 src/usr.sbin/tprof/tprof.h cvs rdiff -u -r0 -r1.1 src/usr.sbin/tprof/arch/tprof_noarch.c \ src/usr.sbin/tprof/arch/tprof_x86.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/x86/tprof_amdpmi.c diff -u src/sys/arch/x86/x86/tprof_amdpmi.c:1.7 src/sys/arch/x86/x86/tprof_amdpmi.c:1.8 --- src/sys/arch/x86/x86/tprof_amdpmi.c:1.7 Tue May 23 08:54:39 2017 +++ src/sys/arch/x86/x86/tprof_amdpmi.c Fri Jul 13 07:56:29 2018 @@ -1,6 +1,35 @@ -/* $NetBSD: tprof_amdpmi.c,v 1.7 2017/05/23 08:54:39 nonaka Exp $ */ +/* $NetBSD: tprof_amdpmi.c,v 1.8 2018/07/13 07:56:29 maxv Exp $ */ -/*- +/* + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Maxime Villard. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* * Copyright (c)2008,2009 YAMAMOTO Takashi, * All rights reserved. * @@ -27,7 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_amdpmi.c,v 1.7 2017/05/23 08:54:39 nonaka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_amdpmi.c,v 1.8 2018/07/13 07:56:29 maxv Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -78,66 +107,53 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_amdpmi * http://developer.amd.com/wordpress/media/2012/10/Basic_Performance_Measurements.pdf */ -/* Event flags - abbreviations as found in the documents */ -#define CPU_clocks__EVENT 0x76 -#define CPU_clocks__UNIT 0x00 -#define DC_refills_L2__EVENT 0x42 -#define DC_refills_L2__UNIT 0x1E -#define DC_refills_sys__EVENT 0x43 -#define DC_refills_sys__UNIT 0x1E - -/* - * Hardcode your counter here. There is no detection, so make sure it is - * supported by your CPU family. - */ -static uint32_t event = CPU_clocks__EVENT; -static uint32_t unit = CPU_clocks__UNIT; static int ctrno = 0; - static uint64_t counter_val = 5000000; static uint64_t counter_reset_val; -static uint32_t tprof_amdpmi_lapic_saved[MAXCPUS]; - -static nmi_handler_t *tprof_amdpmi_nmi_handle; -static tprof_backend_cookie_t *tprof_cookie; +static uint32_t amd_lapic_saved[MAXCPUS]; +static nmi_handler_t *amd_nmi_handle; +static tprof_param_t amd_param; static void -tprof_amdpmi_start_cpu(void *arg1, void *arg2) +tprof_amd_start_cpu(void *arg1, void *arg2) { struct cpu_info * const ci = curcpu(); uint64_t pesr; uint64_t event_lo; uint64_t event_hi; - event_hi = event >> 8; - event_lo = event & 0xff; - pesr = PESR_USR | PESR_OS | PESR_INT | + event_hi = amd_param.p_event >> 8; + event_lo = amd_param.p_event & 0xff; + pesr = + ((amd_param.p_flags & TPROF_PARAM_USER) ? PESR_USR : 0) | + ((amd_param.p_flags & TPROF_PARAM_KERN) ? PESR_OS : 0) | + PESR_INT | __SHIFTIN(event_lo, PESR_EVENT_MASK_LO) | __SHIFTIN(event_hi, PESR_EVENT_MASK_HI) | __SHIFTIN(0, PESR_COUNTER_MASK) | - __SHIFTIN(unit, PESR_UNIT_MASK); + __SHIFTIN(amd_param.p_unit, PESR_UNIT_MASK); wrmsr(PERFCTR(ctrno), counter_reset_val); wrmsr(PERFEVTSEL(ctrno), pesr); - tprof_amdpmi_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT); + amd_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT); lapic_writereg(LAPIC_PCINT, LAPIC_DLMODE_NMI); wrmsr(PERFEVTSEL(ctrno), pesr | PESR_EN); } static void -tprof_amdpmi_stop_cpu(void *arg1, void *arg2) +tprof_amd_stop_cpu(void *arg1, void *arg2) { struct cpu_info * const ci = curcpu(); wrmsr(PERFEVTSEL(ctrno), 0); - lapic_writereg(LAPIC_PCINT, tprof_amdpmi_lapic_saved[cpu_index(ci)]); + lapic_writereg(LAPIC_PCINT, amd_lapic_saved[cpu_index(ci)]); } static int -tprof_amdpmi_nmi(const struct trapframe *tf, void *dummy) +tprof_amd_nmi(const struct trapframe *tf, void *dummy) { tprof_frame_info_t tfi; uint64_t ctr; @@ -154,11 +170,11 @@ tprof_amdpmi_nmi(const struct trapframe /* record a sample */ #if defined(__x86_64__) tfi.tfi_pc = tf->tf_rip; -#else /* defined(__x86_64__) */ +#else tfi.tfi_pc = tf->tf_eip; -#endif /* defined(__x86_64__) */ +#endif tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; - tprof_sample(tprof_cookie, &tfi); + tprof_sample(NULL, &tfi); /* reset counter */ wrmsr(PERFCTR(ctrno), counter_reset_val); @@ -167,7 +183,7 @@ tprof_amdpmi_nmi(const struct trapframe } static uint64_t -tprof_amdpmi_estimate_freq(void) +tprof_amd_estimate_freq(void) { uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq; uint64_t freq = 10000; @@ -175,52 +191,66 @@ tprof_amdpmi_estimate_freq(void) counter_val = cpufreq / freq; if (counter_val == 0) { counter_val = UINT64_C(4000000000) / freq; - return freq; } return freq; } +static uint32_t +tprof_amd_ident(void) +{ + struct cpu_info *ci = curcpu(); + + if (cpu_vendor != CPUVENDOR_AMD) { + return TPROF_IDENT_NONE; + } + + switch (CPUID_TO_FAMILY(ci->ci_signature)) { + case 0x10: + return TPROF_IDENT_AMD_GENERIC; + } + + return TPROF_IDENT_NONE; +} + static int -tprof_amdpmi_start(tprof_backend_cookie_t *cookie) +tprof_amd_start(const tprof_param_t *param) { uint64_t xc; - if (cpu_vendor != CPUVENDOR_AMD) { + if (tprof_amd_ident() == TPROF_IDENT_NONE) { return ENOTSUP; } - KASSERT(tprof_amdpmi_nmi_handle == NULL); - tprof_amdpmi_nmi_handle = nmi_establish(tprof_amdpmi_nmi, NULL); + KASSERT(amd_nmi_handle == NULL); + amd_nmi_handle = nmi_establish(tprof_amd_nmi, NULL); counter_reset_val = - counter_val + 1; - xc = xc_broadcast(0, tprof_amdpmi_start_cpu, NULL, NULL); - xc_wait(xc); + memcpy(&amd_param, param, sizeof(*param)); - KASSERT(tprof_cookie == NULL); - tprof_cookie = cookie; + xc = xc_broadcast(0, tprof_amd_start_cpu, NULL, NULL); + xc_wait(xc); return 0; } static void -tprof_amdpmi_stop(tprof_backend_cookie_t *cookie) +tprof_amd_stop(const tprof_param_t *param) { uint64_t xc; - xc = xc_broadcast(0, tprof_amdpmi_stop_cpu, NULL, NULL); + xc = xc_broadcast(0, tprof_amd_stop_cpu, NULL, NULL); xc_wait(xc); - KASSERT(tprof_amdpmi_nmi_handle != NULL); - KASSERT(tprof_cookie == cookie); - nmi_disestablish(tprof_amdpmi_nmi_handle); - tprof_amdpmi_nmi_handle = NULL; - tprof_cookie = NULL; + KASSERT(amd_nmi_handle != NULL); + nmi_disestablish(amd_nmi_handle); + amd_nmi_handle = NULL; } -static const tprof_backend_ops_t tprof_amdpmi_ops = { - .tbo_estimate_freq = tprof_amdpmi_estimate_freq, - .tbo_start = tprof_amdpmi_start, - .tbo_stop = tprof_amdpmi_stop, +static const tprof_backend_ops_t tprof_amd_ops = { + .tbo_estimate_freq = tprof_amd_estimate_freq, + .tbo_ident = tprof_amd_ident, + .tbo_start = tprof_amd_start, + .tbo_stop = tprof_amd_stop, }; MODULE(MODULE_CLASS_DRIVER, tprof_amdpmi, "tprof"); @@ -231,7 +261,7 @@ tprof_amdpmi_modcmd(modcmd_t cmd, void * switch (cmd) { case MODULE_CMD_INIT: - return tprof_backend_register("tprof_amd", &tprof_amdpmi_ops, + return tprof_backend_register("tprof_amd", &tprof_amd_ops, TPROF_BACKEND_VERSION); case MODULE_CMD_FINI: Index: src/sys/arch/x86/x86/tprof_pmi.c diff -u src/sys/arch/x86/x86/tprof_pmi.c:1.14 src/sys/arch/x86/x86/tprof_pmi.c:1.15 --- src/sys/arch/x86/x86/tprof_pmi.c:1.14 Tue May 23 08:54:39 2017 +++ src/sys/arch/x86/x86/tprof_pmi.c Fri Jul 13 07:56:29 2018 @@ -1,6 +1,35 @@ -/* $NetBSD: tprof_pmi.c,v 1.14 2017/05/23 08:54:39 nonaka Exp $ */ +/* $NetBSD: tprof_pmi.c,v 1.15 2018/07/13 07:56:29 maxv Exp $ */ -/*- +/* + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Maxime Villard. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* * Copyright (c)2008,2009 YAMAMOTO Takashi, * All rights reserved. * @@ -27,7 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof_pmi.c,v 1.14 2017/05/23 08:54:39 nonaka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof_pmi.c,v 1.15 2018/07/13 07:56:29 maxv Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -46,157 +75,92 @@ __KERNEL_RCSID(0, "$NetBSD: tprof_pmi.c, #include <x86/nmi.h> #include <machine/cpufunc.h> -#include <machine/cputypes.h> /* CPUVENDER_* */ +#include <machine/cputypes.h> /* CPUVENDOR_* */ #include <machine/cpuvar.h> /* cpu_vendor */ #include <machine/i82489reg.h> #include <machine/i82489var.h> -#define ESCR_T1_USR __BIT(0) -#define ESCR_T1_OS __BIT(1) -#define ESCR_T0_USR __BIT(2) -#define ESCR_T0_OS __BIT(3) -#define ESCR_TAG_ENABLE __BIT(4) -#define ESCR_TAG_VALUE __BITS(5, 8) -#define ESCR_EVENT_MASK __BITS(9, 24) -#define ESCR_EVENT_SELECT __BITS(25, 30) - -#define CCCR_ENABLE __BIT(12) -#define CCCR_ESCR_SELECT __BITS(13, 15) -#define CCCR_MUST_BE_SET __BITS(16, 17) -#define CCCR_COMPARE __BIT(18) -#define CCCR_COMPLEMENT __BIT(19) -#define CCCR_THRESHOLD __BITS(20, 23) -#define CCCR_EDGE __BIT(24) -#define CCCR_FORCE_OVF __BIT(25) -#define CCCR_OVF_PMI_T0 __BIT(26) -#define CCCR_OVF_PMI_T1 __BIT(27) -#define CCCR_CASCADE __BIT(30) -#define CCCR_OVF __BIT(31) - -struct msrs { - u_int msr_cccr; - u_int msr_escr; - u_int msr_counter; -}; - -/* - * parameters (see 253669.pdf Table A-6) - * - * XXX should not hardcode - */ - -static const struct msrs msrs[] = { - { - .msr_cccr = 0x360, /* MSR_BPU_CCCR0 */ - .msr_escr = 0x3a2, /* MSR_FSB_ESCR0 */ - .msr_counter = 0x300, /* MSR_BPU_COUNTER0 */ - }, - { - .msr_cccr = 0x362, /* MSR_BPU_CCCR2 */ - .msr_escr = 0x3a3, /* MSR_FSB_ESCR1 */ - .msr_counter = 0x302, /* MSR_BPU_COUNTER2 */ - }, -}; -static const u_int cccr_escr_select = 0x6; /* MSR_FSB_ESCR? */ -static const u_int escr_event_select = 0x13; /* global_power_events */ -static const u_int escr_event_mask = 0x1; /* running */ +#define PERFEVTSEL_EVENT_SELECT __BITS(0, 7) +#define PERFEVTSEL_UNIT_MASK __BITS(8, 15) +#define PERFEVTSEL_USR __BIT(16) +#define PERFEVTSEL_OS __BIT(17) +#define PERFEVTSEL_E __BIT(18) +#define PERFEVTSEL_PC __BIT(19) +#define PERFEVTSEL_INT __BIT(20) +#define PERFEVTSEL_EN __BIT(22) +#define PERFEVTSEL_INV __BIT(23) +#define PERFEVTSEL_COUNTER_MASK __BITS(24, 31) + +#define CPUID_0A_VERSION __BITS(0, 7) +#define CPUID_0A_NCOUNTERS __BITS(8, 15) +#define CPUID_0A_BITWIDTH __BITS(16, 23) +static uint64_t counter_bitwidth; static uint64_t counter_val = 5000000; static uint64_t counter_reset_val; -static uint32_t tprof_pmi_lapic_saved[MAXCPUS]; -static nmi_handler_t *tprof_pmi_nmi_handle; -static tprof_backend_cookie_t *tprof_cookie; +static uint32_t intel_lapic_saved[MAXCPUS]; +static nmi_handler_t *intel_nmi_handle; +static tprof_param_t intel_param; static void -tprof_pmi_start_cpu(void *arg1, void *arg2) +tprof_intel_start_cpu(void *arg1, void *arg2) { struct cpu_info * const ci = curcpu(); - const struct msrs *msr; - uint64_t cccr; - uint64_t escr; - - if (ci->ci_smt_id >= 2) { - printf("%s: ignoring %s smt id=%u", - __func__, device_xname(ci->ci_dev), - (u_int)ci->ci_smt_id); - return; - } - msr = &msrs[ci->ci_smt_id]; - escr = __SHIFTIN(escr_event_mask, ESCR_EVENT_MASK) | - __SHIFTIN(escr_event_select, ESCR_EVENT_SELECT); - cccr = CCCR_ENABLE | __SHIFTIN(cccr_escr_select, CCCR_ESCR_SELECT) | - CCCR_MUST_BE_SET; - if (ci->ci_smt_id == 0) { - escr |= ESCR_T0_OS | ESCR_T0_USR; - cccr |= CCCR_OVF_PMI_T0; - } else { - escr |= ESCR_T1_OS | ESCR_T0_USR; - cccr |= CCCR_OVF_PMI_T1; - } + uint64_t evtval; + + evtval = + __SHIFTIN(intel_param.p_event, PERFEVTSEL_EVENT_SELECT) | + __SHIFTIN(intel_param.p_unit, PERFEVTSEL_UNIT_MASK) | + ((intel_param.p_flags & TPROF_PARAM_USER) ? PERFEVTSEL_USR : 0) | + ((intel_param.p_flags & TPROF_PARAM_KERN) ? PERFEVTSEL_OS : 0) | + PERFEVTSEL_INT | + PERFEVTSEL_EN; - wrmsr(msr->msr_counter, counter_reset_val); - wrmsr(msr->msr_escr, escr); - wrmsr(msr->msr_cccr, cccr); - tprof_pmi_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT); + wrmsr(MSR_PERFCTR0, counter_reset_val); + wrmsr(MSR_EVNTSEL0, evtval); + + intel_lapic_saved[cpu_index(ci)] = lapic_readreg(LAPIC_PCINT); lapic_writereg(LAPIC_PCINT, LAPIC_DLMODE_NMI); } static void -tprof_pmi_stop_cpu(void *arg1, void *arg2) +tprof_intel_stop_cpu(void *arg1, void *arg2) { struct cpu_info * const ci = curcpu(); - const struct msrs *msr; - if (ci->ci_smt_id >= 2) { - printf("%s: ignoring %s smt id=%u", - __func__, device_xname(ci->ci_dev), - (u_int)ci->ci_smt_id); - return; - } - msr = &msrs[ci->ci_smt_id]; + wrmsr(MSR_EVNTSEL0, 0); + wrmsr(MSR_PERFCTR0, 0); - wrmsr(msr->msr_escr, 0); - wrmsr(msr->msr_cccr, 0); - lapic_writereg(LAPIC_PCINT, tprof_pmi_lapic_saved[cpu_index(ci)]); + lapic_writereg(LAPIC_PCINT, intel_lapic_saved[cpu_index(ci)]); } static int -tprof_pmi_nmi(const struct trapframe *tf, void *dummy) +tprof_intel_nmi(const struct trapframe *tf, void *dummy) { - struct cpu_info * const ci = curcpu(); - const struct msrs *msr; uint32_t pcint; - uint64_t cccr; + uint64_t ctr; tprof_frame_info_t tfi; KASSERT(dummy == NULL); - if (ci->ci_smt_id >= 2) { - /* not ours */ - return 0; - } - msr = &msrs[ci->ci_smt_id]; - - /* check if it's for us */ - cccr = rdmsr(msr->msr_cccr); - if ((cccr & CCCR_OVF) == 0) { - /* not ours */ + ctr = rdmsr(MSR_PERFCTR0); + /* If the highest bit is non zero, then it's not for us. */ + if ((ctr & __BIT(counter_bitwidth-1)) != 0) { return 0; } /* record a sample */ #if defined(__x86_64__) tfi.tfi_pc = tf->tf_rip; -#else /* defined(__x86_64__) */ +#else tfi.tfi_pc = tf->tf_eip; -#endif /* defined(__x86_64__) */ +#endif tfi.tfi_inkernel = tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS; - tprof_sample(tprof_cookie, &tfi); + tprof_sample(NULL, &tfi); /* reset counter */ - wrmsr(msr->msr_counter, counter_reset_val); - wrmsr(msr->msr_cccr, cccr & ~CCCR_OVF); + wrmsr(MSR_PERFCTR0, counter_reset_val); /* unmask PMI */ pcint = lapic_readreg(LAPIC_PCINT); @@ -207,7 +171,7 @@ tprof_pmi_nmi(const struct trapframe *tf } static uint64_t -tprof_pmi_estimate_freq(void) +tprof_intel_estimate_freq(void) { uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq; uint64_t freq = 10000; @@ -215,54 +179,74 @@ tprof_pmi_estimate_freq(void) counter_val = cpufreq / freq; if (counter_val == 0) { counter_val = UINT64_C(4000000000) / freq; - return freq; } return freq; } +static uint32_t +tprof_intel_ident(void) +{ + uint32_t descs[4]; + + if (cpu_vendor != CPUVENDOR_INTEL) { + return TPROF_IDENT_NONE; + } + + if (cpuid_level < 0x0A) { + return TPROF_IDENT_NONE; + } + x86_cpuid(0x0A, descs); + if ((descs[0] & CPUID_0A_VERSION) == 0) { + return TPROF_IDENT_NONE; + } + if ((descs[0] & CPUID_0A_NCOUNTERS) == 0) { + return TPROF_IDENT_NONE; + } + + counter_bitwidth = __SHIFTOUT(descs[0], CPUID_0A_BITWIDTH); + + return TPROF_IDENT_INTEL_GENERIC; +} + static int -tprof_pmi_start(tprof_backend_cookie_t *cookie) +tprof_intel_start(const tprof_param_t *param) { - struct cpu_info * const ci = curcpu(); uint64_t xc; - if (!(cpu_vendor == CPUVENDOR_INTEL && - CPUID_TO_BASEFAMILY(ci->ci_signature) == 15)) { + if (tprof_intel_ident() == TPROF_IDENT_NONE) { return ENOTSUP; } - KASSERT(tprof_pmi_nmi_handle == NULL); - tprof_pmi_nmi_handle = nmi_establish(tprof_pmi_nmi, NULL); + KASSERT(intel_nmi_handle == NULL); + intel_nmi_handle = nmi_establish(tprof_intel_nmi, NULL); counter_reset_val = - counter_val + 1; - xc = xc_broadcast(0, tprof_pmi_start_cpu, NULL, NULL); - xc_wait(xc); + memcpy(&intel_param, param, sizeof(*param)); - KASSERT(tprof_cookie == NULL); - tprof_cookie = cookie; + xc = xc_broadcast(0, tprof_intel_start_cpu, NULL, NULL); + xc_wait(xc); return 0; } static void -tprof_pmi_stop(tprof_backend_cookie_t *cookie) +tprof_intel_stop(const tprof_param_t *param) { uint64_t xc; - xc = xc_broadcast(0, tprof_pmi_stop_cpu, NULL, NULL); + xc = xc_broadcast(0, tprof_intel_stop_cpu, NULL, NULL); xc_wait(xc); - KASSERT(tprof_pmi_nmi_handle != NULL); - KASSERT(tprof_cookie == cookie); - nmi_disestablish(tprof_pmi_nmi_handle); - tprof_pmi_nmi_handle = NULL; - tprof_cookie = NULL; + KASSERT(intel_nmi_handle != NULL); + nmi_disestablish(intel_nmi_handle); + intel_nmi_handle = NULL; } -static const tprof_backend_ops_t tprof_pmi_ops = { - .tbo_estimate_freq = tprof_pmi_estimate_freq, - .tbo_start = tprof_pmi_start, - .tbo_stop = tprof_pmi_stop, +static const tprof_backend_ops_t tprof_intel_ops = { + .tbo_estimate_freq = tprof_intel_estimate_freq, + .tbo_ident = tprof_intel_ident, + .tbo_start = tprof_intel_start, + .tbo_stop = tprof_intel_stop, }; MODULE(MODULE_CLASS_DRIVER, tprof_pmi, "tprof"); @@ -273,7 +257,7 @@ tprof_pmi_modcmd(modcmd_t cmd, void *arg switch (cmd) { case MODULE_CMD_INIT: - return tprof_backend_register("tprof_pmi", &tprof_pmi_ops, + return tprof_backend_register("tprof_pmi", &tprof_intel_ops, TPROF_BACKEND_VERSION); case MODULE_CMD_FINI: Index: src/sys/dev/tprof/tprof.c diff -u src/sys/dev/tprof/tprof.c:1.13 src/sys/dev/tprof/tprof.c:1.14 --- src/sys/dev/tprof/tprof.c:1.13 Thu Aug 20 14:40:18 2015 +++ src/sys/dev/tprof/tprof.c Fri Jul 13 07:56:29 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.c,v 1.13 2015/08/20 14:40:18 christos Exp $ */ +/* $NetBSD: tprof.c,v 1.14 2018/07/13 07:56:29 maxv Exp $ */ /*- * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.13 2015/08/20 14:40:18 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.14 2018/07/13 07:56:29 maxv Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -247,8 +247,22 @@ tprof_stop1(void) workqueue_destroy(tprof_wq); } +static void +tprof_getinfo(struct tprof_info *info) +{ + tprof_backend_t *tb; + + KASSERT(mutex_owned(&tprof_startstop_lock)); + + memset(info, 0, sizeof(*info)); + info->ti_version = TPROF_VERSION; + if ((tb = tprof_backend) != NULL) { + info->ti_ident = tb->tb_ops->tbo_ident(); + } +} + static int -tprof_start(const struct tprof_param *param) +tprof_start(const tprof_param_t *param) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; @@ -296,7 +310,7 @@ tprof_start(const struct tprof_param *pa callout_setfunc(&c->c_callout, tprof_kick, ci); } - error = tb->tb_ops->tbo_start(NULL); + error = tb->tb_ops->tbo_start(param); if (error != 0) { KASSERT(tb->tb_usecount > 0); tb->tb_usecount--; @@ -404,7 +418,7 @@ tprof_backend_lookup(const char *name) */ void -tprof_sample(tprof_backend_cookie_t *cookie, const tprof_frame_info_t *tfi) +tprof_sample(void *unused, const tprof_frame_info_t *tfi) { tprof_cpu_t * const c = tprof_curcpu(); tprof_buf_t * const buf = c->c_buf; @@ -608,14 +622,16 @@ tprof_read(dev_t dev, struct uio *uio, i static int tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) { - const struct tprof_param *param; + const tprof_param_t *param; int error = 0; KASSERT(minor(dev) == 0); switch (cmd) { - case TPROF_IOC_GETVERSION: - *(int *)data = TPROF_VERSION; + case TPROF_IOC_GETINFO: + mutex_enter(&tprof_startstop_lock); + tprof_getinfo(data); + mutex_exit(&tprof_startstop_lock); break; case TPROF_IOC_START: param = data; Index: src/sys/dev/tprof/tprof.h diff -u src/sys/dev/tprof/tprof.h:1.5 src/sys/dev/tprof/tprof.h:1.6 --- src/sys/dev/tprof/tprof.h:1.5 Sat Feb 5 14:04:40 2011 +++ src/sys/dev/tprof/tprof.h Fri Jul 13 07:56:29 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof.h,v 1.5 2011/02/05 14:04:40 yamt Exp $ */ +/* $NetBSD: tprof.h,v 1.6 2018/07/13 07:56:29 maxv Exp $ */ /*- * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, @@ -37,12 +37,11 @@ #include <dev/tprof/tprof_types.h> -typedef struct tprof_backend_cookie tprof_backend_cookie_t; - typedef struct tprof_backend_ops { uint64_t (*tbo_estimate_freq)(void); /* samples per second */ - int (*tbo_start)(tprof_backend_cookie_t *); - void (*tbo_stop)(tprof_backend_cookie_t *); + uint32_t (*tbo_ident)(void); + int (*tbo_start)(const tprof_param_t *); + void (*tbo_stop)(const tprof_param_t *); } tprof_backend_ops_t; #define TPROF_BACKEND_VERSION 3 @@ -54,6 +53,6 @@ typedef struct { bool tfi_inkernel; /* if tfi_pc is in the kernel address space */ } tprof_frame_info_t; -void tprof_sample(tprof_backend_cookie_t *, const tprof_frame_info_t *); +void tprof_sample(void *, const tprof_frame_info_t *); #endif /* _DEV_TPROF_TPROF_H_ */ Index: src/sys/dev/tprof/tprof_ioctl.h diff -u src/sys/dev/tprof/tprof_ioctl.h:1.3 src/sys/dev/tprof/tprof_ioctl.h:1.4 --- src/sys/dev/tprof/tprof_ioctl.h:1.3 Thu Apr 14 16:23:59 2011 +++ src/sys/dev/tprof/tprof_ioctl.h Fri Jul 13 07:56:29 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_ioctl.h,v 1.3 2011/04/14 16:23:59 yamt Exp $ */ +/* $NetBSD: tprof_ioctl.h,v 1.4 2018/07/13 07:56:29 maxv Exp $ */ /*- * Copyright (c)2008,2010 YAMAMOTO Takashi, @@ -37,14 +37,15 @@ #include <dev/tprof/tprof_types.h> -#define TPROF_VERSION 3 /* kernel-userland ABI version */ +#define TPROF_VERSION 4 /* kernel-userland ABI version */ -#define TPROF_IOC_GETVERSION _IOR('T', 1, int) - -struct tprof_param { - int dummy; +struct tprof_info { + uint32_t ti_version; + uint32_t ti_ident; }; -#define TPROF_IOC_START _IOW('T', 2, struct tprof_param) +#define TPROF_IOC_GETINFO _IOR('T', 1, struct tprof_info) + +#define TPROF_IOC_START _IOW('T', 2, tprof_param_t) #define TPROF_IOC_STOP _IO('T', 3) Index: src/sys/dev/tprof/tprof_types.h diff -u src/sys/dev/tprof/tprof_types.h:1.2 src/sys/dev/tprof/tprof_types.h:1.3 --- src/sys/dev/tprof/tprof_types.h:1.2 Thu Apr 14 16:23:59 2011 +++ src/sys/dev/tprof/tprof_types.h Fri Jul 13 07:56:29 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: tprof_types.h,v 1.2 2011/04/14 16:23:59 yamt Exp $ */ +/* $NetBSD: tprof_types.h,v 1.3 2018/07/13 07:56:29 maxv Exp $ */ /*- * Copyright (c)2010,2011 YAMAMOTO Takashi, @@ -30,14 +30,14 @@ #define _DEV_TPROF_TPROF_TYPES_H_ /* - * definitions used by both of kernel and userland + * definitions used by both kernel and userland */ #if defined(_KERNEL) #include <sys/types.h> -#else /* defined(_KERNEL) */ +#else #include <stdint.h> -#endif /* defined(_KERNEL) */ +#endif typedef struct { uint32_t s_pid; /* process id */ @@ -47,10 +47,22 @@ typedef struct { uintptr_t s_pc; /* program counter */ } tprof_sample_t; -/* - * s_flags - */ +typedef struct tprof_param { + uint64_t p_event; /* event class */ + uint64_t p_unit; /* unit within the event class */ + uint64_t p_flags; +} tprof_param_t; +/* s_flags */ #define TPROF_SAMPLE_INKERNEL 1 /* s_pc is in kernel address space */ +/* p_flags */ +#define TPROF_PARAM_KERN 0x01 +#define TPROF_PARAM_USER 0x02 + +/* ti_ident */ +#define TPROF_IDENT_NONE 0x00 +#define TPROF_IDENT_INTEL_GENERIC 0x01 +#define TPROF_IDENT_AMD_GENERIC 0x02 + #endif /* _DEV_TPROF_TPROF_TYPES_H_ */ Index: src/usr.sbin/tprof/Makefile diff -u src/usr.sbin/tprof/Makefile:1.3 src/usr.sbin/tprof/Makefile:1.4 --- src/usr.sbin/tprof/Makefile:1.3 Sat Nov 26 05:02:44 2011 +++ src/usr.sbin/tprof/Makefile Fri Jul 13 07:56:29 2018 @@ -1,7 +1,16 @@ -# $NetBSD: Makefile,v 1.3 2011/11/26 05:02:44 yamt Exp $ +# $NetBSD: Makefile,v 1.4 2018/07/13 07:56:29 maxv Exp $ + +.PATH: ${.CURDIR}/arch PROG= tprof MAN= tprof.8 +SRCS= tprof.c + +.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "x86_64" +SRCS+= tprof_x86.c +.else +SRCS+= tprof_noarch.c +.endif CPPFLAGS+= -I${NETBSDSRCDIR}/sys/ Index: src/usr.sbin/tprof/tprof.8 diff -u src/usr.sbin/tprof/tprof.8:1.3 src/usr.sbin/tprof/tprof.8:1.4 --- src/usr.sbin/tprof/tprof.8:1.3 Fri Dec 9 15:26:48 2011 +++ src/usr.sbin/tprof/tprof.8 Fri Jul 13 07:56:29 2018 @@ -1,4 +1,4 @@ -.\" $NetBSD: tprof.8,v 1.3 2011/12/09 15:26:48 yamt Exp $ +.\" $NetBSD: tprof.8,v 1.4 2018/07/13 07:56:29 maxv Exp $ .\" .\" Copyright (c)2011 YAMAMOTO Takashi, .\" All rights reserved. @@ -24,21 +24,19 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.\" ------------------------------------------------------------ -.Dd November 26, 2011 +.Dd July 13, 2018 .Dt TPROF 8 .Os -.\" ------------------------------------------------------------ .Sh NAME .Nm tprof .Nd record tprof profiling samples -.\" ------------------------------------------------------------ .Sh SYNOPSIS .Nm +.Op Fl l +.Op Fl e Ar name:option .Op Fl c .Op Fl o Ar file .Ar command ... -.\" ------------------------------------------------------------ .Sh DESCRIPTION The .Nm @@ -58,6 +56,18 @@ The .Nm utility accepts the following options. .Bl -tag -width hogehoge +.It Fl l +Display a list of performance counter events available on the system. +.It Fl e Ar name:option +.Ar name +specifies the name of the event to count; it must be taken from the list of +available events. +.Ar option +specifies the source of the event; it must be a combination of +.Ar u +(userland) and +.Ar k +(kernel). .It Fl o Ar file Write the collected samples to the file named .Ar file . @@ -67,14 +77,12 @@ The default is Write the collected samples to the standard output. Note that the output is a binary stream. .El -.\" ------------------------------------------------------------ .Sh EXAMPLES The following command profiles the system during 1 second and shows -the top-10 kernel functions which likely consumed CPU cycles. +the top-10 kernel functions which likely caused LLC misses. .Bd -literal - tprof -c sleep 1 2>/dev/null | tpfmt -skCLP | head -10 + tprof -e llc-misses:k -c sleep 1 2>/dev/null | tpfmt -skCLP | head -10 .Ed -.\" ------------------------------------------------------------ .Sh DIAGNOSTICS The .Nm @@ -97,23 +105,17 @@ exceeds the limit. The number of samples dropped because the buffers containing the samples were dropped. .El -.\" ------------------------------------------------------------ -.\".Sh HISTORY -.\"The -.\".Nm -.\"utility first appeared in -.\".Nx XXX . -.\" ------------------------------------------------------------ .Sh SEE ALSO .Xr tpfmt 1 , .Xr tprof 4 -.\" ------------------------------------------------------------ .Sh AUTHORS The .Nm utility is written by .An YAMAMOTO Takashi . -.\" ------------------------------------------------------------ +It was revamped by +.An Maxime Villard +in 2018. .Sh CAVEATS The contents and representation of recorded samples are undocumented and will likely be changed for future releases of Index: src/usr.sbin/tprof/tprof.c diff -u src/usr.sbin/tprof/tprof.c:1.5 src/usr.sbin/tprof/tprof.c:1.6 --- src/usr.sbin/tprof/tprof.c:1.5 Tue Jan 10 23:39:33 2012 +++ src/usr.sbin/tprof/tprof.c Fri Jul 13 07:56:29 2018 @@ -1,6 +1,35 @@ -/* $NetBSD: tprof.c,v 1.5 2012/01/10 23:39:33 joerg Exp $ */ +/* $NetBSD: tprof.c,v 1.6 2018/07/13 07:56:29 maxv Exp $ */ -/*- +/* + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Maxime Villard. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* * Copyright (c)2008 YAMAMOTO Takashi, * All rights reserved. * @@ -28,7 +57,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: tprof.c,v 1.5 2012/01/10 23:39:33 joerg Exp $"); +__RCSID("$NetBSD: tprof.c,v 1.6 2018/07/13 07:56:29 maxv Exp $"); #endif /* not lint */ #include <sys/ioctl.h> @@ -47,6 +76,7 @@ __RCSID("$NetBSD: tprof.c,v 1.5 2012/01/ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include "tprof.h" #define _PATH_TPROF "/dev/tprof" @@ -59,6 +89,10 @@ usage(void) fprintf(stderr, "%s [options] command ...\n", getprogname()); fprintf(stderr, "\n"); + fprintf(stderr, "-e name:{u}{k}\t" + "the event to count.\n"); + fprintf(stderr, "-l\t\t" + "list the events.\n"); fprintf(stderr, "-o filename\t" "output to the file. [default: -o tprof.out]\n"); fprintf(stderr, "-c\t\t" @@ -102,6 +136,7 @@ int main(int argc, char *argv[]) { struct tprof_param param; + struct tprof_info info; struct tprof_stat ts; const char *outfile = "tprof.out"; bool cflag = false; @@ -110,16 +145,49 @@ main(int argc, char *argv[]) int error; int ret; int ch; - int version; + char *tokens[2]; + + memset(¶m, 0, sizeof(param)); + + devfd = open(_PATH_TPROF, O_RDWR); + if (devfd == -1) { + err(EXIT_FAILURE, "%s", _PATH_TPROF); + } - while ((ch = getopt(argc, argv, "co:")) != -1) { + ret = ioctl(devfd, TPROF_IOC_GETINFO, &info); + if (ret == -1) { + err(EXIT_FAILURE, "TPROF_IOC_GETINFO"); + } + if (info.ti_version != TPROF_VERSION) { + errx(EXIT_FAILURE, "version mismatch: version=%d, expected=%d", + info.ti_version, TPROF_VERSION); + } + if (tprof_event_init(info.ti_ident) == -1) { + err(EXIT_FAILURE, "cpu not supported"); + } + + while ((ch = getopt(argc, argv, "clo:e:")) != -1) { switch (ch) { case 'c': cflag = true; break; + case 'l': + tprof_event_list(); + return 0; case 'o': outfile = optarg; break; + case 'e': + tokens[0] = strtok(optarg, ":"); + tokens[1] = strtok(NULL, ":"); + if (tokens[1] == NULL) + usage(); + tprof_event_lookup(tokens[0], ¶m); + if (strchr(tokens[1], 'u')) + param.p_flags |= TPROF_PARAM_USER; + if (strchr(tokens[1], 'k')) + param.p_flags |= TPROF_PARAM_KERN; + break; default: usage(); } @@ -130,6 +198,10 @@ main(int argc, char *argv[]) usage(); } + if (param.p_flags == 0) { + usage(); + } + if (cflag) { outfd = STDOUT_FILENO; } else { @@ -139,21 +211,6 @@ main(int argc, char *argv[]) } } - devfd = open(_PATH_TPROF, O_RDWR); - if (devfd == -1) { - err(EXIT_FAILURE, "%s", _PATH_TPROF); - } - - ret = ioctl(devfd, TPROF_IOC_GETVERSION, &version); - if (ret == -1) { - err(EXIT_FAILURE, "TPROF_IOC_GETVERSION"); - } - if (version != TPROF_VERSION) { - errx(EXIT_FAILURE, "version mismatch: version=%d, expected=%d", - version, TPROF_VERSION); - } - - memset(¶m, 0, sizeof(param)); ret = ioctl(devfd, TPROF_IOC_START, ¶m); if (ret == -1) { err(EXIT_FAILURE, "TPROF_IOC_START"); Added files: Index: src/usr.sbin/tprof/tprof.h diff -u /dev/null src/usr.sbin/tprof/tprof.h:1.1 --- /dev/null Fri Jul 13 07:56:29 2018 +++ src/usr.sbin/tprof/tprof.h Fri Jul 13 07:56:29 2018 @@ -0,0 +1,35 @@ +/* $NetBSD: tprof.h,v 1.1 2018/07/13 07:56:29 maxv Exp $ */ + +/* + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Maxime Villard. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +int tprof_event_init(uint32_t); +void tprof_event_list(void); +void tprof_event_lookup(const char *, struct tprof_param *); + Index: src/usr.sbin/tprof/arch/tprof_noarch.c diff -u /dev/null src/usr.sbin/tprof/arch/tprof_noarch.c:1.1 --- /dev/null Fri Jul 13 07:56:29 2018 +++ src/usr.sbin/tprof/arch/tprof_noarch.c Fri Jul 13 07:56:29 2018 @@ -0,0 +1,63 @@ +/* $NetBSD: tprof_noarch.c,v 1.1 2018/07/13 07:56:29 maxv Exp $ */ + +/* + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Maxime Villard. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <err.h> +#include <machine/specialreg.h> +#include <dev/tprof/tprof_ioctl.h> +#include "../tprof.h" + +int tprof_event_init(uint32_t); +void tprof_event_list(void); +void tprof_event_lookup(const char *, struct tprof_param *); + +int +tprof_event_init(uint32_t ident) +{ + errx(EXIT_FAILURE, "architecture not supported"); +} + +void +tprof_event_list(void) +{ + errx(EXIT_FAILURE, "architecture not supported"); +} + +void +tprof_event_lookup(const char *name, struct tprof_param *param) +{ + errx(EXIT_FAILURE, "architecture not supported"); +} Index: src/usr.sbin/tprof/arch/tprof_x86.c diff -u /dev/null src/usr.sbin/tprof/arch/tprof_x86.c:1.1 --- /dev/null Fri Jul 13 07:56:29 2018 +++ src/usr.sbin/tprof/arch/tprof_x86.c Fri Jul 13 07:56:29 2018 @@ -0,0 +1,358 @@ +/* $NetBSD: tprof_x86.c,v 1.1 2018/07/13 07:56:29 maxv Exp $ */ + +/* + * Copyright (c) 2018 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Maxime Villard. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <err.h> +#include <machine/specialreg.h> +#include <dev/tprof/tprof_ioctl.h> +#include "../tprof.h" + +int tprof_event_init(uint32_t); +void tprof_event_list(void); +void tprof_event_lookup(const char *, struct tprof_param *); + +struct name_to_event { + const char *name; + uint64_t event; + uint64_t unit; + bool enabled; +}; + +struct event_table { + const char *tablename; + struct name_to_event *names; + size_t nevents; + struct event_table *next; +}; + +static struct event_table *cpuevents = NULL; + +static void x86_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +/* -------------------------------------------------------------------------- */ + +/* + * Intel Architectural Version 1. + */ +static struct name_to_event intel_arch1_names[] = { + /* Event Name - Event Select - UMask */ + { "unhalted-core-cycles", 0x3C, 0x00, true }, + { "instruction-retired", 0xC0, 0x00, true }, + { "unhalted-reference-cycles", 0x3C, 0x01, true }, + { "llc-reference", 0x2E, 0x4F, true }, + { "llc-misses", 0x2E, 0x41, true }, + { "branch-instruction-retired", 0xC4, 0x00, true }, + { "branch-misses-retired", 0xC5, 0x00, true }, +}; + +static struct event_table intel_arch1 = { + .tablename = "Intel Architectural Version 1", + .names = intel_arch1_names, + .nevents = sizeof(intel_arch1_names) / + sizeof(struct name_to_event), + .next = NULL +}; + +static struct event_table * +init_intel_arch1(void) +{ + unsigned int eax, ebx, ecx, edx; + struct event_table *table; + size_t i; + + eax = 0x0A; + ebx = 0; + ecx = 0; + edx = 0; + x86_cpuid(&eax, &ebx, &ecx, &edx); + + table = &intel_arch1; + for (i = 0; i < table->nevents; i++) { + /* Disable the unsupported events. */ + if ((ebx & (i << 1)) != 0) + table->names[i].enabled = false; + } + + return table; +} + +/* + * Intel Skylake/Kabylake. TODO: there are many more events available. + */ +static struct name_to_event intel_skylake_kabylake_names[] = { + /* Event Name - Event Select - UMask */ + { "itlb-misses-causes-a-walk", 0x85, 0x01, true }, +}; + +static struct event_table intel_skylake_kabylake = { + .tablename = "Intel Skylake/Kabylake", + .names = intel_skylake_kabylake_names, + .nevents = sizeof(intel_skylake_kabylake_names) / + sizeof(struct name_to_event), + .next = NULL +}; + +static struct event_table * +init_intel_skylake_kabylake(void) +{ + return &intel_skylake_kabylake; +} + +static struct event_table * +init_intel_generic(void) +{ + unsigned int eax, ebx, ecx, edx; + struct event_table *table; + + /* + * The kernel made sure the Architectural Version 1 PMCs were + * present. + */ + table = init_intel_arch1(); + + /* + * Now query the additional (non-architectural) events. They + * depend on the CPU model. + */ + eax = 0x01; + ebx = 0; + ecx = 0; + edx = 0; + x86_cpuid(&eax, &ebx, &ecx, &edx); + + switch (CPUID_TO_MODEL(eax)) { + case 0x4E: /* Skylake */ + case 0x5E: /* Skylake */ + case 0x8E: /* Kabylake */ + case 0x9E: /* Kabylake */ + table->next = init_intel_skylake_kabylake(); + break; + } + + return table; +} + +/* -------------------------------------------------------------------------- */ + +/* + * AMD Family 10h + */ +static struct name_to_event amd_f10h_names[] = { + { "seg-load-all", F10H_SEGMENT_REG_LOADS, 0x7f, true }, + { "seg-load-es", F10H_SEGMENT_REG_LOADS, 0x01, true }, + { "seg-load-cs", F10H_SEGMENT_REG_LOADS, 0x02, true }, + { "seg-load-ss", F10H_SEGMENT_REG_LOADS, 0x04, true }, + { "seg-load-ds", F10H_SEGMENT_REG_LOADS, 0x08, true }, + { "seg-load-fs", F10H_SEGMENT_REG_LOADS, 0x10, true }, + { "seg-load-gs", F10H_SEGMENT_REG_LOADS, 0x20, true }, + { "seg-load-hs", F10H_SEGMENT_REG_LOADS, 0x40, true }, + { "l1cache-access", F10H_DATA_CACHE_ACCESS, 0, true }, + { "l1cache-miss", F10H_DATA_CACHE_MISS, 0, true }, + { "l1cache-refill", F10H_DATA_CACHE_REFILL_FROM_L2, 0x1f, true }, + { "l1cache-refill-invalid", F10H_DATA_CACHE_REFILL_FROM_L2, 0x01, true }, + { "l1cache-refill-shared", F10H_DATA_CACHE_REFILL_FROM_L2, 0x02, true }, + { "l1cache-refill-exclusive", F10H_DATA_CACHE_REFILL_FROM_L2, 0x04, true }, + { "l1cache-refill-owner", F10H_DATA_CACHE_REFILL_FROM_L2, 0x08, true }, + { "l1cache-refill-modified", F10H_DATA_CACHE_REFILL_FROM_L2, 0x10, true }, + { "l1cache-load", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x1f, true }, + { "l1cache-load-invalid", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x01, true }, + { "l1cache-load-shared", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x02, true }, + { "l1cache-load-exclusive", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x04, true }, + { "l1cache-load-owner", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x08, true }, + { "l1cache-load-modified", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x10, true }, + { "l1cache-writeback", F10H_CACHE_LINES_EVICTED, 0x1f, true }, + { "l1cache-writeback-invalid", F10H_CACHE_LINES_EVICTED, 0x01, true }, + { "l1cache-writeback-shared", F10H_CACHE_LINES_EVICTED, 0x02, true }, + { "l1cache-writeback-exclusive",F10H_CACHE_LINES_EVICTED, 0x04, true }, + { "l1cache-writeback-owner", F10H_CACHE_LINES_EVICTED, 0x08, true }, + { "l1cache-writeback-modified", F10H_CACHE_LINES_EVICTED, 0x10, true }, + { "l1DTLB-hit-all", F10H_L1_DTLB_HIT, 0x07, true }, + { "l1DTLB-hit-4Kpage", F10H_L1_DTLB_HIT, 0x01, true }, + { "l1DTLB-hit-2Mpage", F10H_L1_DTLB_HIT, 0x02, true }, + { "l1DTLB-hit-1Gpage", F10H_L1_DTLB_HIT, 0x04, true }, + { "l1DTLB-miss-all", F10H_L1_DTLB_MISS, 0x07, true }, + { "l1DTLB-miss-4Kpage", F10H_L1_DTLB_MISS, 0x01, true }, + { "l1DTLB-miss-2Mpage", F10H_L1_DTLB_MISS, 0x02, true }, + { "l1DTLB-miss-1Gpage", F10H_L1_DTLB_MISS, 0x04, true }, + { "l2DTLB-miss-all", F10H_L2_DTLB_MISS, 0x03, true }, + { "l2DTLB-miss-4Kpage", F10H_L2_DTLB_MISS, 0x01, true }, + { "l2DTLB-miss-2Mpage", F10H_L2_DTLB_MISS, 0x02, true }, + /* l2DTLB-miss-1Gpage: reserved on some revisions, so disabled */ + { "l1ITLB-miss", F10H_L1_ITLB_MISS, 0, true }, + { "l2ITLB-miss-all", F10H_L2_ITLB_MISS, 0x03, true }, + { "l2ITLB-miss-4Kpage", F10H_L2_ITLB_MISS, 0x01, true }, + { "l2ITLB-miss-2Mpage", F10H_L2_ITLB_MISS, 0x02, true }, + { "mem-misalign-ref", F10H_MISALIGNED_ACCESS, 0, true }, + { "ins-fetch", F10H_INSTRUCTION_CACHE_FETCH, 0, true }, + { "ins-fetch-miss", F10H_INSTRUCTION_CACHE_MISS, 0, true }, + { "ins-refill-l2", F10H_INSTRUCTION_CACHE_REFILL_FROM_L2, 0, true }, + { "ins-refill-sys", F10H_INSTRUCTION_CACHE_REFILL_FROM_SYS, 0, true }, + { "ins-fetch-stall", F10H_INSTRUCTION_FETCH_STALL, 0, true }, + { "ins-retired", F10H_RETIRED_INSTRUCTIONS, 0, true }, + { "ins-empty", F10H_DECODER_EMPTY, 0, true }, + { "ops-retired", F10H_RETIRED_UOPS, 0, true }, + { "branch-retired", F10H_RETIRED_BRANCH, 0, true }, + { "branch-miss-retired", F10H_RETIRED_MISPREDICTED_BRANCH,0, true }, + { "branch-taken-retired", F10H_RETIRED_TAKEN_BRANCH, 0, true }, + { "branch-taken-miss-retired", F10H_RETIRED_TAKEN_BRANCH_MISPREDICTED, 0, true }, + { "branch-far-retired", F10H_RETIRED_FAR_CONTROL_TRANSFER, 0, true }, + { "branch-resync-retired", F10H_RETIRED_BRANCH_RESYNC, 0, true }, + { "branch-near-retired", F10H_RETIRED_NEAR_RETURNS, 0, true }, + { "branch-near-miss-retired", F10H_RETIRED_NEAR_RETURNS_MISPREDICTED, 0, true }, + { "branch-indirect-miss-retired", F10H_RETIRED_INDIRECT_BRANCH_MISPREDICTED, 0, true }, + { "int-hw", F10H_INTERRUPTS_TAKEN, 0, true }, + { "int-cycles-masked", F10H_INTERRUPTS_MASKED_CYCLES, 0, true }, + { "int-cycles-masked-pending", + F10H_INTERRUPTS_MASKED_CYCLES_INTERRUPT_PENDING, 0, true }, + { "fpu-exceptions", F10H_FPU_EXCEPTIONS, 0, true }, + { "break-match0", F10H_DR0_BREAKPOINT_MATCHES, 0, true }, + { "break-match1", F10H_DR1_BREAKPOINT_MATCHES, 0, true }, + { "break-match2", F10H_DR2_BREAKPOINT_MATCHES, 0, true }, + { "break-match3", F10H_DR3_BREAKPOINT_MATCHES, 0, true }, +}; + +static struct event_table amd_f10h = { + .tablename = "AMD Family 10h", + .names = amd_f10h_names, + .nevents = sizeof(amd_f10h_names) / + sizeof(struct name_to_event), + .next = NULL +}; + +static struct event_table * +init_amd_f10h(void) +{ + return &amd_f10h; +} + +static struct event_table * +init_amd_generic(void) +{ + unsigned int eax, ebx, ecx, edx; + + eax = 0x01; + ebx = 0; + ecx = 0; + edx = 0; + x86_cpuid(&eax, &ebx, &ecx, &edx); + + switch (CPUID_TO_FAMILY(eax)) { + case 0x10: + return init_amd_f10h(); + } + + return NULL; +} + +/* -------------------------------------------------------------------------- */ + +int +tprof_event_init(uint32_t ident) +{ + switch (ident) { + case TPROF_IDENT_NONE: + return -1; + case TPROF_IDENT_INTEL_GENERIC: + cpuevents = init_intel_generic(); + break; + case TPROF_IDENT_AMD_GENERIC: + cpuevents = init_amd_generic(); + break; + } + return (cpuevents == NULL) ? -1 : 0; +} + +static void +recursive_event_list(struct event_table *table) +{ + size_t i; + + printf("%s:\n", table->tablename); + for (i = 0; i < table->nevents; i++) { + if (!table->names[i].enabled) + continue; + printf("\t%s\n", table->names[i].name); + } + printf("\n"); + + if (table->next != NULL) { + recursive_event_list(table->next); + } +} + +void +tprof_event_list(void) +{ + recursive_event_list(cpuevents); +} + +static void +recursive_event_lookup(struct event_table *table, const char *name, + struct tprof_param *param) +{ + size_t i; + + for (i = 0; i < table->nevents; i++) { + if (!table->names[i].enabled) + continue; + if (!strcmp(table->names[i].name, name)) { + param->p_event = table->names[i].event; + param->p_unit = table->names[i].unit; + return; + } + } + + if (table->next != NULL) { + recursive_event_lookup(table->next, name, param); + } else { + errx(EXIT_FAILURE, "event '%s' unknown", name); + } +} + +void +tprof_event_lookup(const char *name, struct tprof_param *param) +{ + recursive_event_lookup(cpuevents, name, param); +}