Re: [PATCH v3] cpufreq: ondemand: handle SW coordinated CPUs

Rafael J. Wysocki Thu, 22 Nov 2012 11:12:56 -0800

On Tuesday, November 20, 2012 01:06:16 PM Fabio Baltieri wrote:
> From: Rickard Andersson <[email protected]>
> 
> This patch fixes a bug that occurred when we had load on a secondary CPU
> and the primary CPU was sleeping. Only one sampling timer was spawned
> and it was spawned as a deferred timer on the primary CPU, so when a
> secondary CPU had a change in load this was not detected by the ondemand
> governor.
> 
> This patch make sure that deferred timers are run on all CPUs in the
> case of software controlled CPUs that run on the same frequency.


While I basically don't have problems with the functionality of this,
I have some with the code organization.

> Signed-off-by: Rickard Andersson <[email protected]>
> Signed-off-by: Fabio Baltieri <[email protected]>
> ---
>  drivers/cpufreq/cpufreq_ondemand.c | 141 
> ++++++++++++++++++++++++++++++++-----
>  1 file changed, 122 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq_ondemand.c 
> b/drivers/cpufreq/cpufreq_ondemand.c
> index 396322f..430f614 100644
> --- a/drivers/cpufreq/cpufreq_ondemand.c
> +++ b/drivers/cpufreq/cpufreq_ondemand.c
> @@ -93,6 +93,7 @@ struct cpu_dbs_info_s {
>        * when user is changing the governor or limits.
>        */
>       struct mutex timer_mutex;
> +     ktime_t time_stamp;
>  };
>  static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
>  
> @@ -285,7 +286,7 @@ static void update_sampling_rate(unsigned int new_rate)
>               policy = cpufreq_cpu_get(cpu);
>               if (!policy)
>                       continue;
> -             dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu);
> +             dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
>               cpufreq_cpu_put(policy);
>  
>               mutex_lock(&dbs_info->timer_mutex);
> @@ -305,7 +306,7 @@ static void update_sampling_rate(unsigned int new_rate)
>                       cancel_delayed_work_sync(&dbs_info->work);
>                       mutex_lock(&dbs_info->timer_mutex);
>  
> -                     schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work,
> +                     schedule_delayed_work_on(cpu, &dbs_info->work,
>                                                usecs_to_jiffies(new_rate));
>  
>               }

The above two changes don't belong to this patch.  Please send a separate patch
with them and a matching description in the changelog.

> @@ -449,6 +450,16 @@ static struct attribute_group dbs_attr_group = {
>  
>  /************************** sysfs end ************************/
>  
> +static bool dbs_sw_coordinated_cpus(struct cpu_dbs_info_s *dbs_info)
> +{
> +     struct cpufreq_policy *policy = dbs_info->cur_policy;
> +
> +     if (cpumask_weight(policy->cpus) > 1)
> +             return true;
> +     else
> +             return false;
> +}

return cpumask_weight(policy->cpus) > 1;

pretty please.

> +
>  static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
>  {
>       if (dbs_tuners_ins.powersave_bias)
> @@ -598,20 +609,41 @@ static void dbs_check_cpu(struct cpu_dbs_info_s 
> *this_dbs_info)
>  
>  static void do_dbs_timer(struct work_struct *work)
>  {
> +     struct delayed_work *dw = to_delayed_work(work);
>       struct cpu_dbs_info_s *dbs_info =
>               container_of(work, struct cpu_dbs_info_s, work.work);
> -     unsigned int cpu = dbs_info->cpu;
> -     int sample_type = dbs_info->sample_type;
> -
> +     int sample_type;
>       int delay;
> +     bool sample = true;
> +
> +     if (dbs_sw_coordinated_cpus(dbs_info)) {
> +             ktime_t time_now;
> +             s64 delta_us;
> +
> +             /* use leader CPU's dbs_info */
> +             dbs_info = &per_cpu(od_cpu_dbs_info, dbs_info->cpu);
> +             mutex_lock(&dbs_info->timer_mutex);
>  
> -     mutex_lock(&dbs_info->timer_mutex);
> +             time_now = ktime_get();
> +             delta_us = ktime_us_delta(time_now, dbs_info->time_stamp);
> +
> +             /* Do nothing if we recently have sampled */
> +             if (delta_us < (s64)(dbs_tuners_ins.sampling_rate / 2))
> +                     sample = false;
> +             else
> +                     dbs_info->time_stamp = time_now;
> +     } else {
> +             mutex_lock(&dbs_info->timer_mutex);
> +     }

Please don't handle locking this way.  Instead, please move the code you'll
run under the lock in both cases into a separate function (it may take "sample"
as an argument along with dbs_info) and call it between
mutex_lock() and mutex_unlock() in each block.

In addition to that, I'd move the whole block executed when
dbs_sw_coordinated_cpus(dbs_info) is true into a separate function where
dbs_info would be a local variable.  This way it wouldn't mix two distinct
cases in the same piece of code that's remarkably hard to follow.

> +
> +     sample_type = dbs_info->sample_type;
>  
>       /* Common NORMAL_SAMPLE setup */
>       dbs_info->sample_type = DBS_NORMAL_SAMPLE;
>       if (!dbs_tuners_ins.powersave_bias ||
>           sample_type == DBS_NORMAL_SAMPLE) {
> -             dbs_check_cpu(dbs_info);
> +             if (sample)
> +                     dbs_check_cpu(dbs_info);
>               if (dbs_info->freq_lo) {
>                       /* Setup timer for SUB_SAMPLE */
>                       dbs_info->sample_type = DBS_SUB_SAMPLE;
> @@ -627,32 +659,41 @@ static void do_dbs_timer(struct work_struct *work)
>                               delay -= jiffies % delay;
>               }
>       } else {
> -             __cpufreq_driver_target(dbs_info->cur_policy,
> -                     dbs_info->freq_lo, CPUFREQ_RELATION_H);
> +             if (sample)
> +                     __cpufreq_driver_target(dbs_info->cur_policy,
> +                                             dbs_info->freq_lo,
> +                                             CPUFREQ_RELATION_H);
>               delay = dbs_info->freq_lo_jiffies;
>       }
> -     schedule_delayed_work_on(cpu, &dbs_info->work, delay);
> +     schedule_delayed_work_on(smp_processor_id(), dw, delay);

We're not supposed to be using smp_processor_id() any more.
get_cpu()/put_cpu() should be used instead.

>       mutex_unlock(&dbs_info->timer_mutex);
>  }
>  
> -static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
> +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info, int cpu)
>  {
>       /* We want all CPUs to do sampling nearly on same jiffy */
>       int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
> +     struct cpu_dbs_info_s *dbs_info_local = &per_cpu(od_cpu_dbs_info, cpu);
>  
>       if (num_online_cpus() > 1)
>               delay -= jiffies % delay;
>  
> +     cancel_delayed_work_sync(&dbs_info_local->work);
>       dbs_info->sample_type = DBS_NORMAL_SAMPLE;
> -     INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
> -     schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
> +     schedule_delayed_work_on(cpu, &dbs_info_local->work, delay);
>  }
>  
> -static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
> +static inline void dbs_timer_exit(int cpu)
>  {
> +     struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
>       cancel_delayed_work_sync(&dbs_info->work);
>  }
>  
> +static void dbs_timer_exit_per_cpu(struct work_struct *dummy)
> +{
> +     dbs_timer_exit(smp_processor_id());
> +}
> +
>  /*
>   * Not all CPUs want IO time to be accounted as busy; this dependson how
>   * efficient idling at a higher frequency/voltage is.
> @@ -676,6 +717,43 @@ static int should_io_be_busy(void)
>       return 0;
>  }
>  
> +static int __cpuinit cpu_callback(struct notifier_block *nfb,
> +                               unsigned long action, void *hcpu)
> +{
> +     unsigned int cpu = (unsigned long)hcpu;
> +     struct device *cpu_dev;
> +     struct cpu_dbs_info_s *dbs_info;
> +
> +     dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
> +
> +     /* use leader CPU's dbs_info */
> +     if (dbs_sw_coordinated_cpus(dbs_info))
> +             dbs_info = &per_cpu(od_cpu_dbs_info, dbs_info->cpu);
> +
> +     cpu_dev = get_cpu_device(cpu);
> +     if (cpu_dev) {
> +             switch (action) {
> +             case CPU_ONLINE:
> +             case CPU_ONLINE_FROZEN:
> +                     dbs_timer_init(dbs_info, cpu);
> +                     break;
> +             case CPU_DOWN_PREPARE:
> +             case CPU_DOWN_PREPARE_FROZEN:
> +                     dbs_timer_exit(cpu);
> +                     break;
> +             case CPU_DOWN_FAILED:
> +             case CPU_DOWN_FAILED_FROZEN:
> +                     dbs_timer_init(dbs_info, cpu);

Why don't you merge this with the CPU_ONLINE* cases?

> +                     break;
> +             }
> +     }
> +     return NOTIFY_OK;
> +}
> +
> +static struct notifier_block __refdata ondemand_cpu_notifier = {
> +     .notifier_call = cpu_callback,
> +};
> +
>  static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
>                                  unsigned int event)
>  {
> @@ -704,9 +782,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy 
> *policy,
>                       if (dbs_tuners_ins.ignore_nice)
>                               j_dbs_info->prev_cpu_nice =
>                                               
> kcpustat_cpu(j).cpustat[CPUTIME_NICE];
> +
> +                     mutex_init(&j_dbs_info->timer_mutex);
> +                     INIT_DEFERRABLE_WORK(&j_dbs_info->work, do_dbs_timer);
> +
> +                     j_dbs_info->rate_mult = 1;
>               }
>               this_dbs_info->cpu = cpu;
> -             this_dbs_info->rate_mult = 1;
>               ondemand_powersave_bias_init_cpu(cpu);
>               /*
>                * Start the timerschedule work, when this governor
> @@ -736,21 +818,42 @@ static int cpufreq_governor_dbs(struct cpufreq_policy 
> *policy,
>               }
>               mutex_unlock(&dbs_mutex);
>  
> -             mutex_init(&this_dbs_info->timer_mutex);
> -             dbs_timer_init(this_dbs_info);
> +             /* If SW coordinated CPUs then register notifier */
> +             if (dbs_sw_coordinated_cpus(this_dbs_info)) {
> +                     register_hotcpu_notifier(&ondemand_cpu_notifier);
> +
> +                     /* Initiate timer time stamp */
> +                     this_dbs_info->time_stamp = ktime_get();
> +
> +                     for_each_cpu(j, policy->cpus) {
> +                             struct cpu_dbs_info_s *j_dbs_info;
> +
> +                             j_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
> +                             dbs_timer_init(j_dbs_info, j);
> +                     }
> +             } else {
> +                     dbs_timer_init(this_dbs_info, cpu);
> +             }
>               break;
>  
>       case CPUFREQ_GOV_STOP:
> -             dbs_timer_exit(this_dbs_info);
> +             dbs_timer_exit(cpu);
>  
>               mutex_lock(&dbs_mutex);
>               mutex_destroy(&this_dbs_info->timer_mutex);
>               dbs_enable--;
>               mutex_unlock(&dbs_mutex);
> -             if (!dbs_enable)
> +             if (!dbs_enable) {
>                       sysfs_remove_group(cpufreq_global_kobject,
>                                          &dbs_attr_group);
>  
> +                     if (dbs_sw_coordinated_cpus(this_dbs_info)) {
> +                             /* Make sure all pending timers/works are
> +                              * stopped. */
> +                             schedule_on_each_cpu(dbs_timer_exit_per_cpu);
> +                             
> unregister_hotcpu_notifier(&ondemand_cpu_notifier);
> +                     }
> +             }
>               break;
>  
>       case CPUFREQ_GOV_LIMITS:

Thanks,
Rafael


-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] cpufreq: ondemand: handle SW coordinated CPUs

Reply via email to