On 04.09.2025 08:35, Penny Zheng wrote: > amd-cppc is the AMD CPU performance scaling driver that introduces a > new CPU frequency control mechanism. The new mechanism is based on > Collaborative Processor Performance Control (CPPC) which is a finer grain > frequency management than legacy ACPI hardware P-States. > Current AMD CPU platforms are using the ACPI P-states driver to > manage CPU frequency and clocks with switching only in 3 P-states, while the > new amd-cppc allows a more flexible, low-latency interface for Xen > to directly communicate the performance hints to hardware. > > "amd-cppc" driver is responsible for implementing CPPC in passive mode, which > still leverages Xen governors such as *ondemand*, *performance*, etc, to > calculate the performance hints. In the future, we will introduce an advanced > active mode to enable autonomous performence level selection. > > Field epp, energy performance preference, which only has meaning when active > mode is enabled and will be introduced later in details, so we read > pre-defined BIOS value for it in passive mode. > > Signed-off-by: Penny Zheng <penny.zh...@amd.com> > Acked-by: Jan Beulich <jbeul...@suse.com>
With the issue I had pointed out, leading to ... > --- > v8 -> v9 > - embed struct amd_cppc_drv_data{} into struct cpufreq_policy{} ... this change, I think the tag would have needed to be dropped. > +static void cf_check amd_cppc_write_request_msrs(void *info) > +{ > + const struct amd_cppc_drv_data *data = info; > + > + wrmsrl(MSR_AMD_CPPC_REQ, data->req.raw); > +} > + > +static void amd_cppc_write_request(unsigned int cpu, > + struct amd_cppc_drv_data *data, > + uint8_t min_perf, uint8_t des_perf, > + uint8_t max_perf, uint8_t epp) > +{ > + uint64_t prev = data->req.raw; > + > + data->req.min_perf = min_perf; > + data->req.max_perf = max_perf; > + data->req.des_perf = des_perf; > + data->req.epp = epp; > + > + if ( prev == data->req.raw ) > + return; > + > + on_selected_cpus(cpumask_of(cpu), amd_cppc_write_request_msrs, data, 1); With "cpu" coming from ... > +} > + > +static int cf_check amd_cppc_cpufreq_target(struct cpufreq_policy *policy, > + unsigned int target_freq, > + unsigned int relation) > +{ > + struct amd_cppc_drv_data *data = policy->u.amd_cppc; > + uint8_t des_perf; > + int res; > + > + if ( unlikely(!target_freq) ) > + return 0; > + > + res = amd_cppc_khz_to_perf(data, target_freq, &des_perf); > + if ( res ) > + return res; > + > + /* > + * Having a performance level lower than the lowest nonlinear > + * performance level, such as, lowest_perf <= perf <= > lowest_nonliner_perf, > + * may actually cause an efficiency penalty, So when deciding the > min_perf > + * value, we prefer lowest nonlinear performance over lowest performance. > + */ > + amd_cppc_write_request(policy->cpu, data, > data->caps.lowest_nonlinear_perf, ... here, how can this work when this particular CPU isn't online anymore? > + des_perf, data->caps.highest_perf, > + /* Pre-defined BIOS value for passive mode */ > + per_cpu(epp_init, policy->cpu)); > + return 0; > +} > + > +static void cf_check amd_cppc_init_msrs(void *info) > +{ > + struct cpufreq_policy *policy = info; > + struct amd_cppc_drv_data *data = policy->u.amd_cppc; > + uint64_t val; > + unsigned int min_freq = 0, nominal_freq = 0, max_freq; > + > + /* Package level MSR */ > + rdmsrl(MSR_AMD_CPPC_ENABLE, val); Here you clarify the scope, yet what about ... > + /* > + * Only when Enable bit is on, the hardware will calculate the > processor’s > + * performance capabilities and initialize the performance level fields > in > + * the CPPC capability registers. > + */ > + if ( !(val & AMD_CPPC_ENABLE) ) > + { > + val |= AMD_CPPC_ENABLE; > + wrmsrl(MSR_AMD_CPPC_ENABLE, val); > + } > + > + rdmsrl(MSR_AMD_CPPC_CAP1, data->caps.raw); ... this and ... > + if ( data->caps.highest_perf == 0 || data->caps.lowest_perf == 0 || > + data->caps.nominal_perf == 0 || data->caps.lowest_nonlinear_perf == > 0 || > + data->caps.lowest_perf > data->caps.lowest_nonlinear_perf || > + data->caps.lowest_nonlinear_perf > data->caps.nominal_perf || > + data->caps.nominal_perf > data->caps.highest_perf ) > + { > + amd_cppc_err(policy->cpu, > + "Out of range values: highest(%u), lowest(%u), > nominal(%u), lowest_nonlinear(%u)\n", > + data->caps.highest_perf, data->caps.lowest_perf, > + data->caps.nominal_perf, > data->caps.lowest_nonlinear_perf); > + goto err; > + } > + > + amd_process_freq(&cpu_data[policy->cpu], > + NULL, NULL, &this_cpu(pxfreq_mhz)); > + > + data->err = amd_get_cpc_freq(data, data->cppc_data->cpc.lowest_mhz, > + data->caps.lowest_perf, &min_freq); > + if ( data->err ) > + return; > + > + data->err = amd_get_cpc_freq(data, data->cppc_data->cpc.nominal_mhz, > + data->caps.nominal_perf, &nominal_freq); > + if ( data->err ) > + return; > + > + data->err = amd_get_max_freq(data, &max_freq); > + if ( data->err ) > + return; > + > + if ( min_freq > nominal_freq || nominal_freq > max_freq ) > + { > + amd_cppc_err(policy->cpu, > + "min(%u), or max(%u), or nominal(%u) freq value is > incorrect\n", > + min_freq, max_freq, nominal_freq); > + goto err; > + } > + > + policy->min = min_freq; > + policy->max = max_freq; > + > + policy->cpuinfo.min_freq = min_freq; > + policy->cpuinfo.max_freq = max_freq; > + policy->cpuinfo.perf_freq = nominal_freq; > + /* > + * Set after policy->cpuinfo.perf_freq, as we are taking > + * APERF/MPERF average frequency as current frequency. > + */ > + policy->cur = cpufreq_driver_getavg(policy->cpu, GOV_GETAVG); > + > + /* Store pre-defined BIOS value for passive mode */ > + rdmsrl(MSR_AMD_CPPC_REQ, val); ... this? > +static int cf_check amd_cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) > +{ > + unsigned int cpu = policy->cpu; > + struct amd_cppc_drv_data *data; > + > + data = xvzalloc(struct amd_cppc_drv_data); > + if ( !data ) > + return -ENOMEM; > + policy->u.amd_cppc = data; > + > + data->cppc_data = &processor_pminfo[cpu]->cppc_data; > + > + on_selected_cpus(cpumask_of(cpu), amd_cppc_init_msrs, policy, 1); > + > + /* > + * The enable bit is sticky, as we need to enable it at the very first > + * begining, before CPPC capability values sanity check. > + * If error path is taken effective, not only amd-cppc cpufreq core fails > + * to initialize, but also we could not fall back to legacy P-states > + * driver, irrespective of the command line specifying a fallback option. > + */ > + if ( data->err ) > + { > + amd_cppc_err(cpu, "Could not initialize cpufreq core in CPPC > mode\n"); > + amd_cppc_cpufreq_cpu_exit(policy); > + return data->err; amd_cppc_cpufreq_cpu_exit() has already freed what data points to. > --- a/xen/include/acpi/cpufreq/cpufreq.h > +++ b/xen/include/acpi/cpufreq/cpufreq.h > @@ -63,6 +63,7 @@ struct perf_limits { > }; > > struct hwp_drv_data; > +struct amd_cppc_drv_data; > struct cpufreq_policy { > cpumask_var_t cpus; /* affected CPUs */ > unsigned int shared_type; /* ANY or ALL affected CPUs > @@ -85,6 +86,9 @@ struct cpufreq_policy { > union { > #ifdef CONFIG_INTEL > struct hwp_drv_data *hwp; /* Driver data for Intel HWP */ > +#endif > +#ifdef CONFIG_AMD > + struct amd_cppc_drv_data *amd_cppc; /* Driver data for AMD CPPC */ > #endif > } u; > }; Same comments here as for the HWP patch. Jan