On Sun, Apr 24, 2022 at 07:06:19PM +0200, Claudio Jeker wrote:
> On Ryzen CPUs each CCD has a temp sensor. If the CPU has CCDs (which
> excludes Zen APU CPUs) this should show additional temp info. This is
> based on info from the Linux k10temp driver.
> 
> Additionally use the MSRs defined in "Open-Source Register Reference For
> AMD Family 17h Processors" to measure the CPU core frequency.
> That should be the actuall speed of the CPU core during the measuring
> interval.
> 
> On my T14g2 the output is now for example:
> ksmn0.temp0                       63.88 degC          Tctl
> ksmn0.frequency0            3553141515.00 Hz          CPU0
> ksmn0.frequency1            3549080315.00 Hz          CPU2
> ksmn0.frequency2            3552369937.00 Hz          CPU4
> ksmn0.frequency3            3546055048.00 Hz          CPU6
> ksmn0.frequency4            3546854449.00 Hz          CPU8
> ksmn0.frequency5            3543869698.00 Hz          CPU10
> ksmn0.frequency6            3542551127.00 Hz          CPU12
> ksmn0.frequency7            4441623647.00 Hz          CPU14
> 
> It is intresting to watch turbo kick in and how temp causes the CPU to
> throttle.
> 
> I only tested this on systems with APUs so I could not thest the Tccd temp
> reporting.
> -- 
> :wq Claudio

Awesome! :-)

I can see this adding a bunch of extra frequency sensors on higher-end
Ryzen/Threadripper/EPYC CPUs, considering it's displayed in "Hz" that
might be a bit overwhelming to look at.. but that's just a cosmetic
nit for systat.

I'll test on some of my AMD machines later, if someone doesn't beat me
to it.

Some comments below..

> Index: ksmn.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/ksmn.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 ksmn.c
> --- ksmn.c    11 Mar 2022 18:00:50 -0000      1.6
> +++ ksmn.c    24 Apr 2022 16:47:08 -0000
> @@ -20,6 +20,7 @@
>  #include <sys/systm.h>
>  #include <sys/device.h>
>  #include <sys/sensors.h>
> +#include <sys/timeout.h>
>  
>  #include <machine/bus.h>
>  
> @@ -42,6 +43,7 @@
>    *     [31:21]  Current reported temperature.
>    */
>  #define SMU_17H_THM  0x59800
> +#define SMU_17H_CCD_THM(o, x)        (SMU_17H_THM + (o) + ((x) * 4))
>  #define GET_CURTMP(r)        (((r) >> 21) & 0x7ff)
>  
>  /*
> @@ -50,6 +52,8 @@
>   */
>  #define CURTMP_17H_RANGE_SEL (1 << 19)
>  #define CURTMP_17H_RANGE_ADJUST      490
> +#define CURTMP_CCD_VALID     (1 << 11)
> +#define CURTMP_CCD_MASK              0x7ff
>  
>  /*
>   * Undocumented tCTL offsets gleamed from Linux k10temp driver.
> @@ -75,13 +79,24 @@ struct ksmn_softc {
>       pcitag_t                sc_pcitag;
>  
>       int                     sc_tctl_offset;
> +     unsigned int            sc_ccd_valid;           /* available Tccds */
> +     unsigned int            sc_ccd_offset;
> +     int                     sc_hz_count;
>  
> -     struct ksensor          sc_sensor;
>       struct ksensordev       sc_sensordev;
> +     struct ksensor          sc_sensor;              /* Tctl */
> +     struct ksensor          sc_ccd_sensor[12];      /* Tccd */
> +
> +     struct timeout          sc_hz_timeout;
> +     struct ksensor          *sc_hz_sensor;
> +     struct cpu_info         **sc_hz_cpu_info;
>  };
>  
>  int  ksmn_match(struct device *, void *, void *);
>  void ksmn_attach(struct device *, struct device *, void *);
> +uint32_t     ksmn_read_reg(struct ksmn_softc *, uint32_t);
> +void ksmn_ccd_attach(struct ksmn_softc *, int);
> +void ksmn_hz_task(void *);
>  void ksmn_refresh(void *);
>  
>  const struct cfattach ksmn_ca = {
> @@ -113,7 +128,12 @@ ksmn_attach(struct device *parent, struc
>       struct ksmn_softc       *sc = (struct ksmn_softc *)self;
>       struct pci_attach_args  *pa = aux;
>       struct curtmp_offset    *p;
> -     extern char             cpu_model[];
> +     CPU_INFO_ITERATOR        cii;
> +     struct cpu_info         *ci = curcpu();
> +     struct ksensor          *s;
> +     extern char              cpu_model[];
> +     int                      i;
> +
>  
>       sc->sc_pc = pa->pa_pc;
>       sc->sc_pcitag = pa->pa_tag;
> @@ -122,6 +142,7 @@ ksmn_attach(struct device *parent, struc
>           sizeof(sc->sc_sensordev.xname));
>  
>       sc->sc_sensor.type = SENSOR_TEMP;
> +     snprintf(sc->sc_sensor.desc, sizeof(sc->sc_sensor.desc), "Tctl");
>       sensor_attach(&sc->sc_sensordev, &sc->sc_sensor);
>  
>       /*
> @@ -136,6 +157,80 @@ ksmn_attach(struct device *parent, struc
>                       sc->sc_tctl_offset = p->tctl_offset;
>       }
>  
> +     sc->sc_ccd_offset = 0x154;
> +
> +     if (ci->ci_family == 0x17 || ci->ci_family == 0x18) {
> +             switch (ci->ci_model) {
> +             case 0x1:       /* Zen */
> +             case 0x8:       /* Zen+ */
> +             case 0x11:      /* Zen APU */
> +             case 0x18:      /* Zen+ APU */
> +                     ksmn_ccd_attach(sc, 4);
> +                     break;
> +             case 0x31:      /* Zen2 Threadripper */
> +             case 0x60:      /* Renoir */
> +             case 0x68:      /* Lucienne */
> +             case 0x71:      /* Zen2 */
> +                     ksmn_ccd_attach(sc, 8);
> +                     break;
> +             }
> +     } else if (ci->ci_family == 0x19) {
> +             uint32_t m = ci->ci_model;
> +
> +             if ((m >= 0x40 && m <= 0x4f) ||
> +                 (m >= 0x10 && m <= 0x1f) ||
> +                 (m >= 0xa0 && m <= 0xaf))
> +                     sc->sc_ccd_offset = 0x300;
> +
> +             if ((m >= 0x10 && m <= 0x1f) ||
> +                 (m >= 0xa0 && m <= 0xaf))
> +                     ksmn_ccd_attach(sc, 12);
> +             else
> +                     ksmn_ccd_attach(sc, 8);
> +     }
> +
> +     CPU_INFO_FOREACH(cii, ci) {
> +             if (ci->ci_smt_id != 0)
> +                     continue;
> +             sc->sc_hz_count++;
> +     }
> +     sc->sc_hz_sensor = mallocarray(sc->sc_hz_count,
> +          sizeof(*sc->sc_hz_sensor), M_DEVBUF, M_WAITOK | M_ZERO);
> +     sc->sc_hz_cpu_info = mallocarray(sc->sc_hz_count,
> +          sizeof(*sc->sc_hz_cpu_info), M_DEVBUF, M_WAITOK | M_ZERO);
> +
> +     i = 0;
> +     CPU_INFO_FOREACH(cii, ci) {
> +             if (ci->ci_smt_id != 0)
> +                     continue;
> +             sc->sc_hz_cpu_info[i] = ci;
> +             i++;
> +     }
> +     /* sort list of CPUs */
> +     for (i = 1; i < sc->sc_hz_count; i++) {
> +             int j;
> +             for (j = 0; j < sc->sc_hz_count - i; j++) {
> +                     if (CPU_INFO_UNIT(sc->sc_hz_cpu_info[j]) >
> +                         CPU_INFO_UNIT(sc->sc_hz_cpu_info[j + 1])) {
> +                             ci = sc->sc_hz_cpu_info[j];
> +                             sc->sc_hz_cpu_info[j] =
> +                                 sc->sc_hz_cpu_info[j + 1];
> +                             sc->sc_hz_cpu_info[j + 1] = ci;
> +                     }
> +             }
> +     }
> +
> +     for (i = 0; i < sc->sc_hz_count; i++) {
> +             ci = sc->sc_hz_cpu_info[i];
> +             s = &sc->sc_hz_sensor[i];
> +             s->type = SENSOR_FREQ;
> +             snprintf(s->desc, sizeof(s->desc), "CPU%d", CPU_INFO_UNIT(ci));
> +             sensor_attach(&sc->sc_sensordev, &sc->sc_hz_sensor[i]);
> +     }
> +
> +     timeout_set_proc(&sc->sc_hz_timeout, ksmn_hz_task, sc);
> +     timeout_add_sec(&sc->sc_hz_timeout, 1);
> +
>       if (sensor_task_register(sc, ksmn_refresh, 5) == NULL) {
>               printf(": unable to register update task\n");
>               return;
> @@ -146,18 +241,78 @@ ksmn_attach(struct device *parent, struc
>       printf("\n");
>  }
>  
> +uint32_t
> +ksmn_read_reg(struct ksmn_softc *sc, uint32_t addr)
> +{
> +     uint32_t reg;
> +     int s;
> +
> +     s = splhigh();
> +     pci_conf_write(sc->sc_pc, sc->sc_pcitag, SMN_17H_ADDR_R, addr);
> +     reg = pci_conf_read(sc->sc_pc, sc->sc_pcitag, SMN_17H_DATA_R);
> +     splx(s);
> +     return reg;
> +}
> +
> +void
> +ksmn_ccd_attach(struct ksmn_softc *sc, int nccd)
> +{
> +     struct ksensor *s;
> +     uint32_t reg;
> +     int i;
> +
> +     KASSERT(nccd > 0 && nccd < nitems(sc->sc_ccd_sensor));
> +
> +     for (i = 0; i < nccd; i++) {
> +             reg = ksmn_read_reg(sc, SMU_17H_CCD_THM(sc->sc_ccd_offset, i));
> +             if (reg & CURTMP_CCD_VALID) {
> +                     sc->sc_ccd_valid |= (1 << i);
> +                     s = &sc->sc_ccd_sensor[i];
> +                     s->type = SENSOR_TEMP;
> +                     snprintf(s->desc, sizeof(s->desc), "Tccd%d", i);
> +                     sensor_attach(&sc->sc_sensordev, s);
> +             }
> +     }
> +}
> +
> +void
> +ksmn_hz_task(void *arg)
> +{
> +     extern uint64_t          tsc_frequency;
> +     struct ksmn_softc       *sc = arg;
> +     struct cpu_info         *ci;
> +     struct ksensor          *sens;
> +     uint64_t                 mperf, aperf;
> +     int                      i, s;
> +
> +     for (i = 0; i < sc->sc_hz_count; i++) {
> +             ci = sc->sc_hz_cpu_info[i];
> +             sens = &sc->sc_hz_sensor[i];
> +             if (!CPU_IS_RUNNING(ci))
> +                     continue;
> +             sched_peg_curproc(ci);
> +
> +             s = splhigh();
> +             mperf = rdmsr(0xE7);
> +             aperf = rdmsr(0xE8);
> +             wrmsr(0xE7, 0);
> +             wrmsr(0xE8, 0);

Do these MSRs have names?

> +             splx(s);
> +             sens->value = ((aperf * tsc_frequency) / mperf) * 1000000;
> +     }
> +
> +     timeout_add_sec(&sc->sc_hz_timeout, 1);
> +}
> +
>  void
>  ksmn_refresh(void *arg)
>  {
>       struct ksmn_softc       *sc = arg;
>       struct ksensor          *s = &sc->sc_sensor;
>       pcireg_t                reg;
> -     int                     raw, offset = 0;
> -
> -     pci_conf_write(sc->sc_pc, sc->sc_pcitag, SMN_17H_ADDR_R,
> -         SMU_17H_THM);
> -     reg = pci_conf_read(sc->sc_pc, sc->sc_pcitag, SMN_17H_DATA_R);
> +     int                     i, raw, offset = 0;
>  
> +     reg = ksmn_read_reg(sc, SMU_17H_THM);
>       raw = GET_CURTMP(reg);
>       if ((reg & CURTMP_17H_RANGE_SEL) != 0)
>               offset -= CURTMP_17H_RANGE_ADJUST;
> @@ -166,5 +321,16 @@ ksmn_refresh(void *arg)
>       offset *= 100000;
>  
>       /* convert raw (in steps of 0.125C) to uC, add offset, uC to uK. */
> -     s->value = ((raw * 125000) + offset) + 273150000;
> +     s->value = raw * 125000 + offset + 273150000;
> +
> +     offset = CURTMP_17H_RANGE_ADJUST * 100000;

Do we not need to handle the tCTL offsets for these additional CCD
sensors on older Zen/Zen+ CPUs? We compenstate for that above, which
I think you're losing here.

> +     for (i = 0; i < nitems(sc->sc_ccd_sensor); i++) {
> +             if (sc->sc_ccd_valid & (1 << i)) {
> +                     s = &sc->sc_ccd_sensor[i];
> +                     reg = ksmn_read_reg(sc,
> +                         SMU_17H_CCD_THM(sc->sc_ccd_offset, i));
> +                     s->value = (reg & CURTMP_CCD_MASK) * 125000 - offset +
> +                         273150000;
> +             }
> +     }
>  }
> 
> 

Reply via email to