Ondemand calculates load in terms of frequency and increases it only
if the load_freq is greater than up_threshold multiplied by current
or average frequency. This seems to produce oscillations of frequency
between min and max because, for example, a relatively small load can
easily saturate minimum frequency and lead the CPU to max. Then, the
CPU will decrease back to min due to a small load_freq.

This patch changes the calculation method of load and target frequency
considering 2 points:
- Load computation should be independent from current or average
measured frequency. For example an absolute load 80% at 100MHz is not
necessarily equivalent to 8% at 1000MHz in the next sampling interval.
- Target frequency should be increased to any value of frequency table
proportional to absolute load, instead to only the max.

The patch also removes the unused code as a result of the above changes.

Tested on Intel i7-3770 CPU @ 3.40GHz and on Quad core 1500MHz Krait.
Phoronix benchmark of Linux Kernel Compilation 3.1 test shows an
increase ~1.5% in performance. cpufreq_stats (time_in_state) shows
that middle frequencies are used more, with this patch. Highest
and lowest frequencies were used less by ~9%

Signed-off-by: Stratos Karafotis <strat...@semaphore.gr>
---
 arch/x86/include/asm/processor.h   | 29 ----------------------
 drivers/cpufreq/Makefile           |  2 +-
 drivers/cpufreq/acpi-cpufreq.c     |  5 ----
 drivers/cpufreq/cpufreq.c          | 21 ----------------
 drivers/cpufreq/cpufreq_governor.c | 10 +-------
 drivers/cpufreq/cpufreq_governor.h |  1 -
 drivers/cpufreq/cpufreq_ondemand.c | 39 ++++++-----------------------
 drivers/cpufreq/mperf.c            | 51 --------------------------------------
 drivers/cpufreq/mperf.h            |  9 -------
 include/linux/cpufreq.h            |  6 -----
 10 files changed, 9 insertions(+), 164 deletions(-)
 delete mode 100644 drivers/cpufreq/mperf.c
 delete mode 100644 drivers/cpufreq/mperf.h

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 22224b3..2874a3b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -941,35 +941,6 @@ extern int set_tsc_mode(unsigned int val);
 
 extern u16 amd_get_nb_id(int cpu);
 
-struct aperfmperf {
-       u64 aperf, mperf;
-};
-
-static inline void get_aperfmperf(struct aperfmperf *am)
-{
-       WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
-
-       rdmsrl(MSR_IA32_APERF, am->aperf);
-       rdmsrl(MSR_IA32_MPERF, am->mperf);
-}
-
-#define APERFMPERF_SHIFT 10
-
-static inline
-unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
-                                   struct aperfmperf *new)
-{
-       u64 aperf = new->aperf - old->aperf;
-       u64 mperf = new->mperf - old->mperf;
-       unsigned long ratio = aperf;
-
-       mperf >>= APERFMPERF_SHIFT;
-       if (mperf)
-               ratio = div64_u64(aperf, mperf);
-
-       return ratio;
-}
-
 extern unsigned long arch_align_stack(unsigned long sp);
 extern void free_init_pages(char *what, unsigned long begin, unsigned long 
end);
 
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 315b923..4519fb1 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_GENERIC_CPUFREQ_CPU0)    += cpufreq-cpu0.o
 # powernow-k8 can load then. ACPI is preferred to all other hardware-specific 
drivers.
 # speedstep-* is preferred over p4-clockmod.
 
-obj-$(CONFIG_X86_ACPI_CPUFREQ)         += acpi-cpufreq.o mperf.o
+obj-$(CONFIG_X86_ACPI_CPUFREQ)         += acpi-cpufreq.o
 obj-$(CONFIG_X86_POWERNOW_K8)          += powernow-k8.o
 obj-$(CONFIG_X86_PCC_CPUFREQ)          += pcc-cpufreq.o
 obj-$(CONFIG_X86_POWERNOW_K6)          += powernow-k6.o
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 11b8b4b..0025cdd 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -45,7 +45,6 @@
 #include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
-#include "mperf.h"
 
 MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski");
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
@@ -842,10 +841,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
        /* notify BIOS that we exist */
        acpi_processor_notify_smm(THIS_MODULE);
 
-       /* Check for APERF/MPERF support in hardware */
-       if (boot_cpu_has(X86_FEATURE_APERFMPERF))
-               acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf;
-
        pr_debug("CPU%u - ACPI performance management activated.\n", cpu);
        for (i = 0; i < perf->state_count; i++)
                pr_debug("     %cP%d: %d MHz, %d mW, %d uS\n",
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 2d53f47..04ceddc 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1502,27 +1502,6 @@ no_policy:
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
 
-int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
-{
-       int ret = 0;
-
-       if (cpufreq_disabled())
-               return ret;
-
-       if (!cpufreq_driver->getavg)
-               return 0;
-
-       policy = cpufreq_cpu_get(policy->cpu);
-       if (!policy)
-               return -EINVAL;
-
-       ret = cpufreq_driver->getavg(policy, cpu);
-
-       cpufreq_cpu_put(policy);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
-
 /*
  * when "event" is CPUFREQ_GOV_LIMITS
  */
diff --git a/drivers/cpufreq/cpufreq_governor.c 
b/drivers/cpufreq/cpufreq_governor.c
index 5af40ad..eeab30a 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -97,7 +97,7 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 
        policy = cdbs->cur_policy;
 
-       /* Get Absolute Load (in terms of freq for ondemand gov) */
+       /* Get Absolute Load */
        for_each_cpu(j, policy->cpus) {
                struct cpu_dbs_common_info *j_cdbs;
                u64 cur_wall_time, cur_idle_time;
@@ -148,14 +148,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 
                load = 100 * (wall_time - idle_time) / wall_time;
 
-               if (dbs_data->cdata->governor == GOV_ONDEMAND) {
-                       int freq_avg = __cpufreq_driver_getavg(policy, j);
-                       if (freq_avg <= 0)
-                               freq_avg = policy->cur;
-
-                       load *= freq_avg;
-               }
-
                if (load > max_load)
                        max_load = load;
        }
diff --git a/drivers/cpufreq/cpufreq_governor.h 
b/drivers/cpufreq/cpufreq_governor.h
index e16a961..e899c11 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -169,7 +169,6 @@ struct od_dbs_tuners {
        unsigned int sampling_rate;
        unsigned int sampling_down_factor;
        unsigned int up_threshold;
-       unsigned int adj_up_threshold;
        unsigned int powersave_bias;
        unsigned int io_is_busy;
 };
diff --git a/drivers/cpufreq/cpufreq_ondemand.c 
b/drivers/cpufreq/cpufreq_ondemand.c
index 4b9bb5d..c1e6d3e 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -29,11 +29,9 @@
 #include "cpufreq_governor.h"
 
 /* On-demand governor macros */
-#define DEF_FREQUENCY_DOWN_DIFFERENTIAL                (10)
 #define DEF_FREQUENCY_UP_THRESHOLD             (80)
 #define DEF_SAMPLING_DOWN_FACTOR               (1)
 #define MAX_SAMPLING_DOWN_FACTOR               (100000)
-#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL      (3)
 #define MICRO_FREQUENCY_UP_THRESHOLD           (95)
 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE                (10000)
 #define MIN_FREQUENCY_UP_THRESHOLD             (11)
@@ -159,14 +157,10 @@ static void dbs_freq_increase(struct cpufreq_policy *p, 
unsigned int freq)
 
 /*
  * Every sampling_rate, we check, if current idle time is less than 20%
- * (default), then we try to increase frequency. Every sampling_rate, we look
- * for the lowest frequency which can sustain the load while keeping idle time
- * over 30%. If such a frequency exist, we try to decrease to this frequency.
- *
- * Any frequency increase takes it to the maximum frequency. Frequency 
reduction
- * happens at minimum steps of 5% (default) of current frequency
+ * (default), then we try to increase frequency. Else, we adjust the frequency
+ * proportional to load.
  */
-static void od_check_cpu(int cpu, unsigned int load_freq)
+static void od_check_cpu(int cpu, unsigned int load)
 {
        struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
        struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy;
@@ -176,29 +170,17 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
        dbs_info->freq_lo = 0;
 
        /* Check for frequency increase */
-       if (load_freq > od_tuners->up_threshold * policy->cur) {
+       if (load > od_tuners->up_threshold) {
                /* If switching to max speed, apply sampling_down_factor */
                if (policy->cur < policy->max)
                        dbs_info->rate_mult =
                                od_tuners->sampling_down_factor;
                dbs_freq_increase(policy, policy->max);
                return;
-       }
-
-       /* Check for frequency decrease */
-       /* if we cannot reduce the frequency anymore, break out early */
-       if (policy->cur == policy->min)
-               return;
-
-       /*
-        * The optimal frequency is the frequency that is the lowest that can
-        * support the current CPU usage without triggering the up policy. To be
-        * safe, we focus 10 points under the threshold.
-        */
-       if (load_freq < od_tuners->adj_up_threshold
-                       * policy->cur) {
+       } else {
+               /* Calculate the next frequency proportional to load */
                unsigned int freq_next;
-               freq_next = load_freq / od_tuners->adj_up_threshold;
+               freq_next = load * policy->max / 100;
 
                /* No longer fully busy, reset rate_mult */
                dbs_info->rate_mult = 1;
@@ -372,9 +354,6 @@ static ssize_t store_up_threshold(struct dbs_data 
*dbs_data, const char *buf,
                        input < MIN_FREQUENCY_UP_THRESHOLD) {
                return -EINVAL;
        }
-       /* Calculate the new adj_up_threshold */
-       od_tuners->adj_up_threshold += input;
-       od_tuners->adj_up_threshold -= od_tuners->up_threshold;
 
        od_tuners->up_threshold = input;
        return count;
@@ -523,8 +502,6 @@ static int od_init(struct dbs_data *dbs_data)
        if (idle_time != -1ULL) {
                /* Idle micro accounting is supported. Use finer thresholds */
                tuners->up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
-               tuners->adj_up_threshold = MICRO_FREQUENCY_UP_THRESHOLD -
-                       MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
                /*
                 * In nohz/micro accounting case we set the minimum frequency
                 * not depending on HZ, but fixed (very low). The deferred
@@ -533,8 +510,6 @@ static int od_init(struct dbs_data *dbs_data)
                dbs_data->min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
        } else {
                tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
-               tuners->adj_up_threshold = DEF_FREQUENCY_UP_THRESHOLD -
-                       DEF_FREQUENCY_DOWN_DIFFERENTIAL;
 
                /* For correct statistics, we need 10 ticks for each measure */
                dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
diff --git a/drivers/cpufreq/mperf.c b/drivers/cpufreq/mperf.c
deleted file mode 100644
index 911e193..0000000
--- a/drivers/cpufreq/mperf.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/slab.h>
-
-#include "mperf.h"
-
-static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf);
-
-/* Called via smp_call_function_single(), on the target CPU */
-static void read_measured_perf_ctrs(void *_cur)
-{
-       struct aperfmperf *am = _cur;
-
-       get_aperfmperf(am);
-}
-
-/*
- * Return the measured active (C0) frequency on this CPU since last call
- * to this function.
- * Input: cpu number
- * Return: Average CPU frequency in terms of max frequency (zero on error)
- *
- * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
- * over a period of time, while CPU is in C0 state.
- * IA32_MPERF counts at the rate of max advertised frequency
- * IA32_APERF counts at the rate of actual CPU frequency
- * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
- * no meaning should be associated with absolute values of these MSRs.
- */
-unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
-                                       unsigned int cpu)
-{
-       struct aperfmperf perf;
-       unsigned long ratio;
-       unsigned int retval;
-
-       if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
-               return 0;
-
-       ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf);
-       per_cpu(acfreq_old_perf, cpu) = perf;
-
-       retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
-
-       return retval;
-}
-EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf);
-MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/mperf.h b/drivers/cpufreq/mperf.h
deleted file mode 100644
index 5dbf295..0000000
--- a/drivers/cpufreq/mperf.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- *  (c) 2010 Advanced Micro Devices, Inc.
- *  Your use of this code is subject to the terms and conditions of the
- *  GNU general public license version 2. See "COPYING" or
- *  http://www.gnu.org/licenses/gpl.html
- */
-
-unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy,
-                                       unsigned int cpu);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 037d36a..60fcb42 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -211,10 +211,6 @@ extern int __cpufreq_driver_target(struct cpufreq_policy 
*policy,
                                   unsigned int target_freq,
                                   unsigned int relation);
 
-
-extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy,
-                                  unsigned int cpu);
-
 int cpufreq_register_governor(struct cpufreq_governor *governor);
 void cpufreq_unregister_governor(struct cpufreq_governor *governor);
 
@@ -254,8 +250,6 @@ struct cpufreq_driver {
        unsigned int    (*get)  (unsigned int cpu);
 
        /* optional */
-       unsigned int (*getavg)  (struct cpufreq_policy *policy,
-                                unsigned int cpu);
        int     (*bios_limit)   (int cpu, unsigned int *limit);
 
        int     (*exit)         (struct cpufreq_policy *policy);
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to