cpufreq: implement amd-cppc driver for CPPC in passive mode

Jan Beulich Thu, 04 Sep 2025 23:45:29 -0700

On 05.09.2025 07:15, Penny, Zheng wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeul...@suse.com>
>> Sent: Thursday, September 4, 2025 8:04 PM
>>
>> On 04.09.2025 08:35, Penny Zheng wrote:
>>> +static void cf_check amd_cppc_write_request_msrs(void *info) {
>>> +    const struct amd_cppc_drv_data *data = info;
>>> +
>>> +    wrmsrl(MSR_AMD_CPPC_REQ, data->req.raw); }
>>> +
>>> +static void amd_cppc_write_request(unsigned int cpu,
>>> +                                   struct amd_cppc_drv_data *data,
>>> +                                   uint8_t min_perf, uint8_t des_perf,
>>> +                                   uint8_t max_perf, uint8_t epp) {
>>> +    uint64_t prev = data->req.raw;
>>> +
>>> +    data->req.min_perf = min_perf;
>>> +    data->req.max_perf = max_perf;
>>> +    data->req.des_perf = des_perf;
>>> +    data->req.epp = epp;
>>> +
>>> +    if ( prev == data->req.raw )
>>> +        return;
>>> +
>>> +    on_selected_cpus(cpumask_of(cpu), amd_cppc_write_request_msrs,
>>> + data, 1);
>>
>> With "cpu" coming from ...
>>
>>> +}
>>> +
>>> +static int cf_check amd_cppc_cpufreq_target(struct cpufreq_policy *policy,
>>> +                                            unsigned int target_freq,
>>> +                                            unsigned int relation) {
>>> +    struct amd_cppc_drv_data *data = policy->u.amd_cppc;
>>> +    uint8_t des_perf;
>>> +    int res;
>>> +
>>> +    if ( unlikely(!target_freq) )
>>> +        return 0;
>>> +
>>> +    res = amd_cppc_khz_to_perf(data, target_freq, &des_perf);
>>> +    if ( res )
>>> +        return res;
>>> +
>>> +    /*
>>> +     * Having a performance level lower than the lowest nonlinear
>>> +     * performance level, such as, lowest_perf <= perf <= 
>>> lowest_nonliner_perf,
>>> +     * may actually cause an efficiency penalty, So when deciding the 
>>> min_perf
>>> +     * value, we prefer lowest nonlinear performance over lowest 
>>> performance.
>>> +     */
>>> +    amd_cppc_write_request(policy->cpu, data,
>>> + data->caps.lowest_nonlinear_perf,
>>
>> ... here, how can this work when this particular CPU isn't online anymore?
> 
> Once any processor in the domain gets offline, the governor will stop, then 
> .target() could not be invoked any more:
> ```
>         if ( hw_all || cpumask_weight(cpufreq_dom->map) == 
> domain_info->num_processors )
>                 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
> ```


I can't bring the code in line with what you say.

>>> +                           des_perf, data->caps.highest_perf,
>>> +                           /* Pre-defined BIOS value for passive mode */
>>> +                           per_cpu(epp_init, policy->cpu));
>>> +    return 0;
>>> +}
>>> +
>>> +static void cf_check amd_cppc_init_msrs(void *info) {
>>> +    struct cpufreq_policy *policy = info;
>>> +    struct amd_cppc_drv_data *data = policy->u.amd_cppc;
>>> +    uint64_t val;
>>> +    unsigned int min_freq = 0, nominal_freq = 0, max_freq;
>>> +
>>> +    /* Package level MSR */
>>> +    rdmsrl(MSR_AMD_CPPC_ENABLE, val);
>>
>> Here you clarify the scope, yet what about ...
>>
>>> +    /*
>>> +     * Only when Enable bit is on, the hardware will calculate the 
>>> processor’s
>>> +     * performance capabilities and initialize the performance level 
>>> fields in
>>> +     * the CPPC capability registers.
>>> +     */
>>> +    if ( !(val & AMD_CPPC_ENABLE) )
>>> +    {
>>> +        val |= AMD_CPPC_ENABLE;
>>> +        wrmsrl(MSR_AMD_CPPC_ENABLE, val);
>>> +    }
>>> +
>>> +    rdmsrl(MSR_AMD_CPPC_CAP1, data->caps.raw);
>>
>> ... this and ...
>>
> GOV_GETAVG);
>>> +
>>> +    /* Store pre-defined BIOS value for passive mode */
>>> +    rdmsrl(MSR_AMD_CPPC_REQ, val);
>>
>> ... this?
> 
> They are all Per-thread MSR. I'll add descriptions.

If they're per-thread, coordination will be yet more difficult if any domain
had more than one thread in it. So question again: Is it perhaps disallowed
by the spec for there to be any "domain" covering more than a single thread?

>>> --- a/xen/include/acpi/cpufreq/cpufreq.h
>>> +++ b/xen/include/acpi/cpufreq/cpufreq.h
>>> @@ -63,6 +63,7 @@ struct perf_limits {  };
>>>
>>>  struct hwp_drv_data;
>>> +struct amd_cppc_drv_data;
>>>  struct cpufreq_policy {
>>>      cpumask_var_t       cpus;          /* affected CPUs */
>>>      unsigned int        shared_type;   /* ANY or ALL affected CPUs
>>> @@ -85,6 +86,9 @@ struct cpufreq_policy {
>>>      union {
>>>  #ifdef CONFIG_INTEL
>>>          struct hwp_drv_data *hwp; /* Driver data for Intel HWP */
>>> +#endif
>>> +#ifdef CONFIG_AMD
>>> +        struct amd_cppc_drv_data *amd_cppc; /* Driver data for AMD
>>> +CPPC */
>>>  #endif
>>>      } u;
>>>  };
>>
>> Same comments here as for the HWP patch.
> 
> May I ask why structure over pointer here?

Efficiency: Less allocations, and one less indirection level. For relatively
small structures you also want to consider the storage overhead of the extra
pointer.

Jan

Re: [PATCH v9 2/8] xen/cpufreq: implement amd-cppc driver for CPPC in passive mode

Reply via email to