From: Rafael J. Wysocki <[email protected]>

Modify the ACPI cpufreq driver to provide a method for switching
CPU frequencies from interrupt context and update the cpufreq core
and the schedutil governor to use that method if available.

Introduce a new cpufreq driver callback, ->fast_switch, to be
invoked for frequency switching from interrupt context via
new helper function cpufreq_driver_fast_switch().

Modify the schedutil governor to call cpufreq_driver_fast_switch()
from its sugov_update_commit() function and avoid queuing up the
irq_work if that is successful.

Implement the ->fast_switch callback in the ACPI cpufreq driver
(with a limited coverage for the time being).

In addition to the above, cpufreq_governor_limits() is modified so
it doesn't call __cpufreq_driver_target() to enforce the new limits
immediately as they will be take into account anyway during the next
update from the scheduler.

Signed-off-by: Rafael J. Wysocki <[email protected]>
---

This actually is the first version of the $subject patch, but since it belongs
to the schedutil governor combo, I've given it the v2.

Please note that this is a prototype, so it may not be done the way I'll want
to do it finally, although ATM I don't quite see how that might be done in
a significantly different way.  Ideas welcome, however.

It works on my test machine and doesn't break powertop even.

Thanks,
Rafael

---
 drivers/cpufreq/acpi-cpufreq.c      |   63 ++++++++++++++++++++++++++++++++++++
 drivers/cpufreq/cpufreq.c           |   35 ++++++++++++++++++++
 drivers/cpufreq/cpufreq_governor.c  |    8 ----
 drivers/cpufreq/cpufreq_schedutil.c |   20 ++++++++---
 include/linux/cpufreq.h             |    4 ++
 5 files changed, 117 insertions(+), 13 deletions(-)

Index: linux-pm/drivers/cpufreq/acpi-cpufreq.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/acpi-cpufreq.c
+++ linux-pm/drivers/cpufreq/acpi-cpufreq.c
@@ -70,6 +70,7 @@ struct acpi_cpufreq_data {
        unsigned int cpu_feature;
        unsigned int acpi_perf_cpu;
        cpumask_var_t freqdomain_cpus;
+       void (*cpu_freq_fast_write)(u32 val);
 };
 
 /* acpi_perf_data is a pointer to percpu data. */
@@ -243,6 +244,15 @@ static unsigned extract_freq(u32 val, st
        }
 }
 
+void cpu_freq_fast_write_intel(u32 val)
+{
+       u32 lo, hi;
+
+       rdmsr(MSR_IA32_PERF_CTL, lo, hi);
+       lo = (lo & ~INTEL_MSR_RANGE) | (val & INTEL_MSR_RANGE);
+       wrmsr(MSR_IA32_PERF_CTL, lo, hi);
+}
+
 struct msr_addr {
        u32 reg;
 };
@@ -484,6 +494,53 @@ out:
        return result;
 }
 
+unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
+                                     unsigned int target_freq)
+{
+       struct acpi_cpufreq_data *data = policy->driver_data;
+       struct cpufreq_frequency_table *entry;
+       struct acpi_processor_performance *perf;
+       unsigned int uninitialized_var(next_perf_state);
+       unsigned int uninitialized_var(next_freq);
+       unsigned int best_diff;
+
+       if (!data->cpu_freq_fast_write)
+               return CPUFREQ_ENTRY_INVALID;
+
+       for (entry = data->freq_table, best_diff = UINT_MAX;
+            entry->frequency != CPUFREQ_TABLE_END; entry++) {
+               unsigned int diff, freq = entry->frequency;
+
+               if (freq == CPUFREQ_ENTRY_INVALID)
+                       continue;
+
+               diff = abs(freq - target_freq);
+               if (diff >= best_diff)
+                       continue;
+
+               best_diff = diff;
+               next_perf_state = entry->driver_data;
+               next_freq = freq;
+               if (best_diff == 0)
+                       goto found;
+       }
+       if (best_diff == UINT_MAX)
+               return CPUFREQ_ENTRY_INVALID;
+
+ found:
+       perf = to_perf_data(data);
+       if (perf->state == next_perf_state) {
+               if (unlikely(data->resume))
+                       data->resume = 0;
+               else
+                       return next_freq;
+       }
+
+       data->cpu_freq_fast_write(perf->states[next_perf_state].control);
+       perf->state = next_perf_state;
+       return next_freq;
+}
+
 static unsigned long
 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
 {
@@ -745,6 +802,7 @@ static int acpi_cpufreq_cpu_init(struct
                pr_debug("HARDWARE addr space\n");
                if (check_est_cpu(cpu)) {
                        data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+                       data->cpu_freq_fast_write = cpu_freq_fast_write_intel;
                        break;
                }
                if (check_amd_hwpstate_cpu(cpu)) {
@@ -760,6 +818,10 @@ static int acpi_cpufreq_cpu_init(struct
                goto err_unreg;
        }
 
+       if (acpi_pstate_strict || (policy_is_shared(policy) &&
+           policy->shared_type != CPUFREQ_SHARED_TYPE_ANY))
+               data->cpu_freq_fast_write = NULL;
+
        data->freq_table = kzalloc(sizeof(*data->freq_table) *
                    (perf->state_count+1), GFP_KERNEL);
        if (!data->freq_table) {
@@ -894,6 +956,7 @@ static struct freq_attr *acpi_cpufreq_at
 static struct cpufreq_driver acpi_cpufreq_driver = {
        .verify         = cpufreq_generic_frequency_table_verify,
        .target_index   = acpi_cpufreq_target,
+       .fast_switch    = acpi_cpufreq_fast_switch,
        .bios_limit     = acpi_processor_get_bios_limit,
        .init           = acpi_cpufreq_cpu_init,
        .exit           = acpi_cpufreq_cpu_exit,
Index: linux-pm/include/linux/cpufreq.h
===================================================================
--- linux-pm.orig/include/linux/cpufreq.h
+++ linux-pm/include/linux/cpufreq.h
@@ -271,6 +271,8 @@ struct cpufreq_driver {
                                  unsigned int relation);       /* Deprecated */
        int             (*target_index)(struct cpufreq_policy *policy,
                                        unsigned int index);
+       unsigned int    (*fast_switch)(struct cpufreq_policy *policy,
+                                      unsigned int target_freq);
        /*
         * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
         * unset.
@@ -485,6 +487,8 @@ struct cpufreq_governor {
 };
 
 /* Pass a target to the cpufreq driver */
+bool cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+                               unsigned int target_freq);
 int cpufreq_driver_target(struct cpufreq_policy *policy,
                                 unsigned int target_freq,
                                 unsigned int relation);
Index: linux-pm/drivers/cpufreq/cpufreq.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/cpufreq.c
+++ linux-pm/drivers/cpufreq/cpufreq.c
@@ -1814,6 +1814,41 @@ EXPORT_SYMBOL(cpufreq_unregister_notifie
  *                              GOVERNORS                            *
  *********************************************************************/
 
+/**
+ * cpufreq_driver_fast_switch - Carry out a fast CPU frequency switch.
+ * @policy: cpufreq policy to switch the frequency for.
+ * @target_freq: New frequency to set (may be approximate).
+ *
+ * Carry out a fast frequency switch from interrupt context.
+ *
+ * It is guaranteed that this function will never be called twice in parallel
+ * for the same policy and that it will not be called in parallel with either
+ * ->target() or ->target_index() for the same policy.
+ *
+ * If CPUFREQ_ENTRY_INVALID is returned by the driver's ->fast_switch()
+ * callback, the hardware configuration must be preserved.
+ *
+ * Return 'true' on success and 'false' on failures.
+ */
+bool cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+                               unsigned int target_freq)
+{
+       if (target_freq == policy->cur)
+               return true;
+
+       if (cpufreq_driver->fast_switch) {
+               unsigned int freq;
+
+               freq = cpufreq_driver->fast_switch(policy, target_freq);
+               if (freq != CPUFREQ_ENTRY_INVALID) {
+                       policy->cur = freq;
+                       trace_cpu_frequency(freq, smp_processor_id());
+                       return true;
+               }
+       }
+       return false;
+}
+
 /* Must set freqs->new to intermediate frequency */
 static int __target_intermediate(struct cpufreq_policy *policy,
                                 struct cpufreq_freqs *freqs, int index)
Index: linux-pm/drivers/cpufreq/cpufreq_schedutil.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/cpufreq_schedutil.c
+++ linux-pm/drivers/cpufreq/cpufreq_schedutil.c
@@ -83,12 +83,22 @@ static unsigned int sugov_next_freq(stru
 static void sugov_update_commit(struct policy_dbs_info *policy_dbs, u64 time,
                                unsigned int next_freq)
 {
-       struct sugov_policy *sg_policy = to_sg_policy(policy_dbs);
-
-       sg_policy->next_freq = next_freq;
        policy_dbs->last_sample_time = time;
-       policy_dbs->work_in_progress = true;
-       irq_work_queue(&policy_dbs->irq_work);
+
+       if (cpufreq_driver_fast_switch(policy_dbs->policy, next_freq)) {
+               /*
+                * Restore the sample delay in case it has been set to 0
+                * from sysfs in the meantime.
+                */
+               gov_update_sample_delay(policy_dbs,
+                                       policy_dbs->dbs_data->sampling_rate);
+       } else {
+               struct sugov_policy *sg_policy = to_sg_policy(policy_dbs);
+
+               sg_policy->next_freq = next_freq;
+               policy_dbs->work_in_progress = true;
+               irq_work_queue(&policy_dbs->irq_work);
+       }
 }
 
 static void sugov_update_shared(struct update_util_data *data, u64 time,
Index: linux-pm/drivers/cpufreq/cpufreq_governor.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/cpufreq_governor.c
+++ linux-pm/drivers/cpufreq/cpufreq_governor.c
@@ -612,16 +612,8 @@ static int cpufreq_governor_limits(struc
        struct policy_dbs_info *policy_dbs = policy->governor_data;
 
        mutex_lock(&policy_dbs->timer_mutex);
-
-       if (policy->max < policy->cur)
-               __cpufreq_driver_target(policy, policy->max, 
CPUFREQ_RELATION_H);
-       else if (policy->min > policy->cur)
-               __cpufreq_driver_target(policy, policy->min, 
CPUFREQ_RELATION_L);
-
        gov_update_sample_delay(policy_dbs, 0);
-
        mutex_unlock(&policy_dbs->timer_mutex);
-
        return 0;
 }
 

Reply via email to