Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=dfde5d62ed9b28b0bda676c16e8cb635df244ef2
Commit:     dfde5d62ed9b28b0bda676c16e8cb635df244ef2
Parent:     a6f6e6e6ab464c9d1dff66570b78be2f66d8ba3d
Author:     Venkatesh Pallipadi <[EMAIL PROTECTED]>
AuthorDate: Tue Oct 3 12:38:45 2006 -0700
Committer:  Dave Jones <[EMAIL PROTECTED]>
CommitDate: Sun Oct 15 19:57:11 2006 -0400

    [CPUFREQ][8/8] acpi-cpufreq: Add support for freq feedback from hardware
    
    Enable ondemand governor and acpi-cpufreq to use IA32_APERF and IA32_MPERF 
MSR
    to get active frequency feedback for the last sampling interval. This will
    make ondemand take right frequency decisions when hardware coordination of
    frequency is going on.
    
    Without APERF/MPERF, ondemand can take wrong decision at times due
    to underlying hardware coordination or TM2.
    Example:
    * CPU 0 and CPU 1 are hardware cooridnated.
    * CPU 1 running at highest frequency.
    * CPU 0 was running at highest freq. Now ondemand reduces it to
      some intermediate frequency based on utilization.
    * Due to underlying hardware coordination with other CPU 1, CPU 0 continues 
to
      run at highest frequency (as long as other CPU is at highest).
    * When ondemand samples CPU 0 again next time, without actual frequency
      feedback from APERF/MPERF, it will think that previous frequency change
      was successful and can go to wrong target frequency. This is because it
      thinks that utilization it has got this sampling interval is when running 
at
      intermediate frequency, rather than actual highest frequency.
    
    More information about IA32_APERF IA32_MPERF MSR:
    Refer to IA-32 IntelĀ® Architecture Software Developer's Manual at
    http://developer.intel.com
    
    Signed-off-by: Venkatesh Pallipadi <[EMAIL PROTECTED]>
    Signed-off-by: Dave Jones <[EMAIL PROTECTED]>
---
 arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c |  107 ++++++++++++++++++++++++++-
 drivers/cpufreq/cpufreq.c                   |   20 +++++
 drivers/cpufreq/cpufreq_ondemand.c          |    9 ++-
 include/asm-i386/msr.h                      |    3 +
 include/asm-x86_64/msr.h                    |    3 +
 include/linux/cpufreq.h                     |    3 +
 6 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c 
b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b0c7db..f8a8e46 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -58,10 +58,12 @@ enum {
 };
 
 #define INTEL_MSR_RANGE                (0xffff)
+#define CPUID_6_ECX_APERFMPERF_CAPABILITY      (0x1)
 
 struct acpi_cpufreq_data {
        struct acpi_processor_performance *acpi_data;
        struct cpufreq_frequency_table *freq_table;
+       unsigned int max_freq;
        unsigned int resume;
        unsigned int cpu_feature;
 };
@@ -258,6 +260,100 @@ static u32 get_cur_val(cpumask_t mask)
        return cmd.val;
 }
 
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ */
+static unsigned int get_measured_perf(unsigned int cpu)
+{
+       union {
+               struct {
+                       u32 lo;
+                       u32 hi;
+               } split;
+               u64 whole;
+       } aperf_cur, mperf_cur;
+
+       cpumask_t saved_mask;
+       unsigned int perf_percent;
+       unsigned int retval;
+
+       saved_mask = current->cpus_allowed;
+       set_cpus_allowed(current, cpumask_of_cpu(cpu));
+       if (get_cpu() != cpu) {
+               /* We were not able to run on requested processor */
+               put_cpu();
+               return 0;
+       }
+
+       rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
+       rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
+
+       wrmsr(MSR_IA32_APERF, 0,0);
+       wrmsr(MSR_IA32_MPERF, 0,0);
+
+#ifdef __i386__
+       /*
+        * We dont want to do 64 bit divide with 32 bit kernel
+        * Get an approximate value. Return failure in case we cannot get
+        * an approximate value.
+        */
+       if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
+               int shift_count;
+               u32 h;
+
+               h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
+               shift_count = fls(h);
+
+               aperf_cur.whole >>= shift_count;
+               mperf_cur.whole >>= shift_count;
+       }
+
+       if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
+               int shift_count = 7;
+               aperf_cur.split.lo >>= shift_count;
+               mperf_cur.split.lo >>= shift_count;
+       }
+
+       if (aperf_cur.split.lo && mperf_cur.split.lo) {
+               perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
+       } else {
+               perf_percent = 0;
+       }
+
+#else
+       if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
+               int shift_count = 7;
+               aperf_cur.whole >>= shift_count;
+               mperf_cur.whole >>= shift_count;
+       }
+
+       if (aperf_cur.whole && mperf_cur.whole) {
+               perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
+       } else {
+               perf_percent = 0;
+       }
+
+#endif
+
+       retval = drv_data[cpu]->max_freq * perf_percent / 100;
+
+       put_cpu();
+       set_cpus_allowed(current, saved_mask);
+
+       dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
+       return retval;
+}
+
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
        struct acpi_cpufreq_data *data = drv_data[cpu];
@@ -497,7 +593,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
        unsigned int valid_states = 0;
        unsigned int cpu = policy->cpu;
        struct acpi_cpufreq_data *data;
-       unsigned int l, h;
        unsigned int result = 0;
        struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
        struct acpi_processor_performance *perf;
@@ -591,6 +686,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
        }
        policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
 
+       data->max_freq = perf->states[0].core_frequency * 1000;
        /* table init */
        for (i = 0; i < perf->state_count; i++) {
                if (i > 0 && perf->states[i].core_frequency ==
@@ -625,6 +721,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy 
*policy)
        /* notify BIOS that we exist */
        acpi_processor_notify_smm(THIS_MODULE);
 
+       /* Check for APERF/MPERF support in hardware */
+       if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
+               unsigned int ecx;
+               ecx = cpuid_ecx(6);
+               if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) {
+                       acpi_cpufreq_driver.getavg = get_measured_perf;
+               }
+       }
+
        dprintk("CPU%u - ACPI performance management activated.\n", cpu);
        for (i = 0; i < perf->state_count; i++)
                dprintk("     %cP%d: %d MHz, %d mW, %d uS\n",
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 86e69b7..56c433e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1274,6 +1274,26 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
 
+int cpufreq_driver_getavg(struct cpufreq_policy *policy)
+{
+       int ret = 0;
+
+       policy = cpufreq_cpu_get(policy->cpu);
+       if (!policy)
+               return -EINVAL;
+
+       mutex_lock(&policy->lock);
+
+       if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
+               ret = cpufreq_driver->getavg(policy->cpu);
+
+       mutex_unlock(&policy->lock);
+
+       cpufreq_cpu_put(policy);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_getavg);
+
 /*
  * Locking: Must be called with the lock_cpu_hotplug() lock held
  * when "event" is CPUFREQ_GOV_LIMITS
diff --git a/drivers/cpufreq/cpufreq_ondemand.c 
b/drivers/cpufreq/cpufreq_ondemand.c
index bf8aa45..291cfe9 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -393,8 +393,15 @@ static void dbs_check_cpu(struct cpu_dbs_info_s 
*this_dbs_info)
         * policy. To be safe, we focus 10 points under the threshold.
         */
        if (load < (dbs_tuners_ins.up_threshold - 10)) {
-               unsigned int freq_next = (policy->cur * load) /
+               unsigned int freq_next, freq_cur;
+
+               freq_cur = cpufreq_driver_getavg(policy);
+               if (!freq_cur)
+                       freq_cur = policy->cur;
+
+               freq_next = (freq_cur * load) /
                        (dbs_tuners_ins.up_threshold - 10);
+
                if (!dbs_tuners_ins.powersave_bias) {
                        __cpufreq_driver_target(policy, freq_next,
                                        CPUFREQ_RELATION_L);
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h
index 62b76cd..0aa15fc 100644
--- a/include/asm-i386/msr.h
+++ b/include/asm-i386/msr.h
@@ -125,6 +125,9 @@ static inline void wrmsrl (unsigned long msr, unsigned long 
long val)
 #define MSR_IA32_PERF_STATUS           0x198
 #define MSR_IA32_PERF_CTL              0x199
 
+#define MSR_IA32_MPERF                 0xE7
+#define MSR_IA32_APERF                 0xE8
+
 #define MSR_IA32_THERM_CONTROL         0x19a
 #define MSR_IA32_THERM_INTERRUPT       0x19b
 #define MSR_IA32_THERM_STATUS          0x19c
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index 37e1941..e615822 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -307,6 +307,9 @@ static inline unsigned int cpuid_edx(unsigned int op)
 #define MSR_IA32_PERF_STATUS           0x198
 #define MSR_IA32_PERF_CTL              0x199
 
+#define MSR_IA32_MPERF                 0xE7
+#define MSR_IA32_APERF                 0xE8
+
 #define MSR_IA32_THERM_CONTROL         0x19a
 #define MSR_IA32_THERM_INTERRUPT       0x19b
 #define MSR_IA32_THERM_STATUS          0x19c
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 4ea39fe..7f008f6 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -172,6 +172,8 @@ extern int __cpufreq_driver_target(struct cpufreq_policy 
*policy,
                                   unsigned int relation);
 
 
+extern int cpufreq_driver_getavg(struct cpufreq_policy *policy);
+
 int cpufreq_register_governor(struct cpufreq_governor *governor);
 void cpufreq_unregister_governor(struct cpufreq_governor *governor);
 
@@ -204,6 +206,7 @@ struct cpufreq_driver {
        unsigned int    (*get)  (unsigned int cpu);
 
        /* optional */
+       unsigned int (*getavg)  (unsigned int cpu);
        int     (*exit)         (struct cpufreq_policy *policy);
        int     (*suspend)      (struct cpufreq_policy *policy, pm_message_t 
pmsg);
        int     (*resume)       (struct cpufreq_policy *policy);
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to