On 04.09.2025 08:35, Penny Zheng wrote:
> amd-cppc is the AMD CPU performance scaling driver that introduces a
> new CPU frequency control mechanism. The new mechanism is based on
> Collaborative Processor Performance Control (CPPC) which is a finer grain
> frequency management than legacy ACPI hardware P-States.
> Current AMD CPU platforms are using the ACPI P-states driver to
> manage CPU frequency and clocks with switching only in 3 P-states, while the
> new amd-cppc allows a more flexible, low-latency interface for Xen
> to directly communicate the performance hints to hardware.
> 
> "amd-cppc" driver is responsible for implementing CPPC in passive mode, which
> still leverages Xen governors such as *ondemand*, *performance*, etc, to
> calculate the performance hints. In the future, we will introduce an advanced
> active mode to enable autonomous performence level selection.
> 
> Field epp, energy performance preference, which only has meaning when active
> mode is enabled and will be introduced later in details, so we read
> pre-defined BIOS value for it in passive mode.
> 
> Signed-off-by: Penny Zheng <penny.zh...@amd.com>
> Acked-by: Jan Beulich <jbeul...@suse.com>

With the issue I had pointed out, leading to ...

> ---
> v8 -> v9
> - embed struct amd_cppc_drv_data{} into struct cpufreq_policy{}

... this change, I think the tag would have needed to be dropped.

> +static void cf_check amd_cppc_write_request_msrs(void *info)
> +{
> +    const struct amd_cppc_drv_data *data = info;
> +
> +    wrmsrl(MSR_AMD_CPPC_REQ, data->req.raw);
> +}
> +
> +static void amd_cppc_write_request(unsigned int cpu,
> +                                   struct amd_cppc_drv_data *data,
> +                                   uint8_t min_perf, uint8_t des_perf,
> +                                   uint8_t max_perf, uint8_t epp)
> +{
> +    uint64_t prev = data->req.raw;
> +
> +    data->req.min_perf = min_perf;
> +    data->req.max_perf = max_perf;
> +    data->req.des_perf = des_perf;
> +    data->req.epp = epp;
> +
> +    if ( prev == data->req.raw )
> +        return;
> +
> +    on_selected_cpus(cpumask_of(cpu), amd_cppc_write_request_msrs, data, 1);

With "cpu" coming from ...

> +}
> +
> +static int cf_check amd_cppc_cpufreq_target(struct cpufreq_policy *policy,
> +                                            unsigned int target_freq,
> +                                            unsigned int relation)
> +{
> +    struct amd_cppc_drv_data *data = policy->u.amd_cppc;
> +    uint8_t des_perf;
> +    int res;
> +
> +    if ( unlikely(!target_freq) )
> +        return 0;
> +
> +    res = amd_cppc_khz_to_perf(data, target_freq, &des_perf);
> +    if ( res )
> +        return res;
> +
> +    /*
> +     * Having a performance level lower than the lowest nonlinear
> +     * performance level, such as, lowest_perf <= perf <= 
> lowest_nonliner_perf,
> +     * may actually cause an efficiency penalty, So when deciding the 
> min_perf
> +     * value, we prefer lowest nonlinear performance over lowest performance.
> +     */
> +    amd_cppc_write_request(policy->cpu, data, 
> data->caps.lowest_nonlinear_perf,

... here, how can this work when this particular CPU isn't online anymore?

> +                           des_perf, data->caps.highest_perf,
> +                           /* Pre-defined BIOS value for passive mode */
> +                           per_cpu(epp_init, policy->cpu));
> +    return 0;
> +}
> +
> +static void cf_check amd_cppc_init_msrs(void *info)
> +{
> +    struct cpufreq_policy *policy = info;
> +    struct amd_cppc_drv_data *data = policy->u.amd_cppc;
> +    uint64_t val;
> +    unsigned int min_freq = 0, nominal_freq = 0, max_freq;
> +
> +    /* Package level MSR */
> +    rdmsrl(MSR_AMD_CPPC_ENABLE, val);

Here you clarify the scope, yet what about ...

> +    /*
> +     * Only when Enable bit is on, the hardware will calculate the 
> processor’s
> +     * performance capabilities and initialize the performance level fields 
> in
> +     * the CPPC capability registers.
> +     */
> +    if ( !(val & AMD_CPPC_ENABLE) )
> +    {
> +        val |= AMD_CPPC_ENABLE;
> +        wrmsrl(MSR_AMD_CPPC_ENABLE, val);
> +    }
> +
> +    rdmsrl(MSR_AMD_CPPC_CAP1, data->caps.raw);

... this and ...

> +    if ( data->caps.highest_perf == 0 || data->caps.lowest_perf == 0 ||
> +         data->caps.nominal_perf == 0 || data->caps.lowest_nonlinear_perf == 
> 0 ||
> +         data->caps.lowest_perf > data->caps.lowest_nonlinear_perf ||
> +         data->caps.lowest_nonlinear_perf > data->caps.nominal_perf ||
> +         data->caps.nominal_perf > data->caps.highest_perf )
> +    {
> +        amd_cppc_err(policy->cpu,
> +                     "Out of range values: highest(%u), lowest(%u), 
> nominal(%u), lowest_nonlinear(%u)\n",
> +                     data->caps.highest_perf, data->caps.lowest_perf,
> +                     data->caps.nominal_perf, 
> data->caps.lowest_nonlinear_perf);
> +        goto err;
> +    }
> +
> +    amd_process_freq(&cpu_data[policy->cpu],
> +                     NULL, NULL, &this_cpu(pxfreq_mhz));
> +
> +    data->err = amd_get_cpc_freq(data, data->cppc_data->cpc.lowest_mhz,
> +                                 data->caps.lowest_perf, &min_freq);
> +    if ( data->err )
> +        return;
> +
> +    data->err = amd_get_cpc_freq(data, data->cppc_data->cpc.nominal_mhz,
> +                                 data->caps.nominal_perf, &nominal_freq);
> +    if ( data->err )
> +        return;
> +
> +    data->err = amd_get_max_freq(data, &max_freq);
> +    if ( data->err )
> +        return;
> +
> +    if ( min_freq > nominal_freq || nominal_freq > max_freq )
> +    {
> +        amd_cppc_err(policy->cpu,
> +                     "min(%u), or max(%u), or nominal(%u) freq value is 
> incorrect\n",
> +                     min_freq, max_freq, nominal_freq);
> +        goto err;
> +    }
> +
> +    policy->min = min_freq;
> +    policy->max = max_freq;
> +
> +    policy->cpuinfo.min_freq = min_freq;
> +    policy->cpuinfo.max_freq = max_freq;
> +    policy->cpuinfo.perf_freq = nominal_freq;
> +    /*
> +     * Set after policy->cpuinfo.perf_freq, as we are taking
> +     * APERF/MPERF average frequency as current frequency.
> +     */
> +    policy->cur = cpufreq_driver_getavg(policy->cpu, GOV_GETAVG);
> +
> +    /* Store pre-defined BIOS value for passive mode */
> +    rdmsrl(MSR_AMD_CPPC_REQ, val);

... this?

> +static int cf_check amd_cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
> +{
> +    unsigned int cpu = policy->cpu;
> +    struct amd_cppc_drv_data *data;
> +
> +    data = xvzalloc(struct amd_cppc_drv_data);
> +    if ( !data )
> +        return -ENOMEM;
> +    policy->u.amd_cppc = data;
> +
> +    data->cppc_data = &processor_pminfo[cpu]->cppc_data;
> +
> +    on_selected_cpus(cpumask_of(cpu), amd_cppc_init_msrs, policy, 1);
> +
> +    /*
> +     * The enable bit is sticky, as we need to enable it at the very first
> +     * begining, before CPPC capability values sanity check.
> +     * If error path is taken effective, not only amd-cppc cpufreq core fails
> +     * to initialize, but also we could not fall back to legacy P-states
> +     * driver, irrespective of the command line specifying a fallback option.
> +     */
> +    if ( data->err )
> +    {
> +        amd_cppc_err(cpu, "Could not initialize cpufreq core in CPPC 
> mode\n");
> +        amd_cppc_cpufreq_cpu_exit(policy);
> +        return data->err;

amd_cppc_cpufreq_cpu_exit() has already freed what data points to.

> --- a/xen/include/acpi/cpufreq/cpufreq.h
> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> @@ -63,6 +63,7 @@ struct perf_limits {
>  };
>  
>  struct hwp_drv_data;
> +struct amd_cppc_drv_data;
>  struct cpufreq_policy {
>      cpumask_var_t       cpus;          /* affected CPUs */
>      unsigned int        shared_type;   /* ANY or ALL affected CPUs
> @@ -85,6 +86,9 @@ struct cpufreq_policy {
>      union {
>  #ifdef CONFIG_INTEL
>          struct hwp_drv_data *hwp; /* Driver data for Intel HWP */
> +#endif
> +#ifdef CONFIG_AMD
> +        struct amd_cppc_drv_data *amd_cppc; /* Driver data for AMD CPPC */
>  #endif
>      } u;
>  };

Same comments here as for the HWP patch.

Jan

Reply via email to