From: Vaidyanathan Srinivasan <sva...@linux.vnet.ibm.com>

Backend driver to dynamically set voltage and frequency on
IBM POWER non-virtualized platforms.  Power management SPRs
are used to set the required PState.

This driver works in conjunction with cpufreq governors
like 'ondemand' to provide a demand based frequency and
voltage setting on IBM POWER non-virtualized platforms.

PState table is obtained from OPAL v3 firmware through device
tree.

powernv_cpufreq back-end driver would parse the relevant device-tree
nodes and initialise the cpufreq subsystem on powernv platform.

Signed-off-by: Vaidyanathan Srinivasan <sva...@linux.vnet.ibm.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.b...@linux.vnet.ibm.com>
Signed-off-by: Anton Blanchard <an...@samba.org>
Signed-off-by: Gautham R. Shenoy <e...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/reg.h         |   4 +
 arch/powerpc/platforms/powernv/Kconfig |   1 +
 drivers/cpufreq/Kconfig                |   1 +
 drivers/cpufreq/Kconfig.powerpc        |  13 ++
 drivers/cpufreq/Makefile               |   1 +
 drivers/cpufreq/powernv-cpufreq.c      | 277 +++++++++++++++++++++++++++++++++
 6 files changed, 297 insertions(+)
 create mode 100644 drivers/cpufreq/powernv-cpufreq.c

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 90c06ec..84f92ca 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -271,6 +271,10 @@
 #define SPRN_HSRR1     0x13B   /* Hypervisor Save/Restore 1 */
 #define SPRN_IC                0x350   /* Virtual Instruction Count */
 #define SPRN_VTB       0x351   /* Virtual Time Base */
+#define SPRN_PMICR     0x354   /* Power Management Idle Control Reg */
+#define SPRN_PMSR      0x355   /* Power Management Status Reg */
+#define SPRN_PMCR      0x374   /* Power Management Control Register */
+
 /* HFSCR and FSCR bit numbers are the same */
 #define FSCR_TAR_LG    8       /* Enable Target Address Register */
 #define FSCR_EBB_LG    7       /* Enable Event Based Branching */
diff --git a/arch/powerpc/platforms/powernv/Kconfig 
b/arch/powerpc/platforms/powernv/Kconfig
index 895e8a2..1fe12b1 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -11,6 +11,7 @@ config PPC_POWERNV
        select PPC_UDBG_16550
        select PPC_SCOM
        select ARCH_RANDOM
+       select CPU_FREQ
        default y
 
 config PPC_POWERNV_RTAS
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 4b029c0..4ba1632 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -48,6 +48,7 @@ config CPU_FREQ_STAT_DETAILS
 choice
        prompt "Default CPUFreq governor"
        default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1100_CPUFREQ || 
ARM_SA1110_CPUFREQ
+       default CPU_FREQ_DEFAULT_GOV_ONDEMAND if POWERNV_CPUFREQ
        default CPU_FREQ_DEFAULT_GOV_PERFORMANCE
        help
          This option sets which CPUFreq governor shall be loaded at
diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc
index ca0021a..93f8689 100644
--- a/drivers/cpufreq/Kconfig.powerpc
+++ b/drivers/cpufreq/Kconfig.powerpc
@@ -54,3 +54,16 @@ config PPC_PASEMI_CPUFREQ
        help
          This adds the support for frequency switching on PA Semi
          PWRficient processors.
+
+config POWERNV_CPUFREQ
+       tristate "CPU frequency scaling for IBM POWERNV platform"
+       depends on PPC_POWERNV
+       select CPU_FREQ_GOV_PERFORMANCE
+       select CPU_FREQ_GOV_POWERSAVE
+       select CPU_FREQ_GOV_USERSPACE
+       select CPU_FREQ_GOV_ONDEMAND
+       select CPU_FREQ_GOV_CONSERVATIVE
+       default y
+       help
+        This adds support for CPU frequency switching on IBM POWERNV
+        platform
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 7494565..0dbb963 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -86,6 +86,7 @@ obj-$(CONFIG_PPC_CORENET_CPUFREQ)   += ppc-corenet-cpufreq.o
 obj-$(CONFIG_CPU_FREQ_PMAC)            += pmac32-cpufreq.o
 obj-$(CONFIG_CPU_FREQ_PMAC64)          += pmac64-cpufreq.o
 obj-$(CONFIG_PPC_PASEMI_CPUFREQ)       += pasemi-cpufreq.o
+obj-$(CONFIG_POWERNV_CPUFREQ)          += powernv-cpufreq.o
 
 
##################################################################################
 # Other platform drivers
diff --git a/drivers/cpufreq/powernv-cpufreq.c 
b/drivers/cpufreq/powernv-cpufreq.c
new file mode 100644
index 0000000..ab1551f
--- /dev/null
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -0,0 +1,277 @@
+/*
+ * POWERNV cpufreq driver for the IBM POWER processors
+ *
+ * (C) Copyright IBM 2014
+ *
+ * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt)    "powernv-cpufreq: " fmt
+
+#include <linux/module.h>
+#include <linux/cpufreq.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+
+/* FIXME: Make this per-core */
+static DEFINE_MUTEX(freq_switch_mutex);
+
+#define POWERNV_MAX_PSTATES    256
+
+static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
+static int powernv_pstate_ids[POWERNV_MAX_PSTATES+1];
+
+/*
+ * Initialize the freq table based on data obtained
+ * from the firmware passed via device-tree
+ */
+
+static int init_powernv_pstates(void)
+{
+       struct device_node *power_mgt;
+       int nr_pstates = 0;
+       int pstate_min, pstate_max, pstate_nominal;
+       const __be32 *pstate_ids, *pstate_freqs;
+       int i;
+       u32 len_ids, len_freqs;
+
+       power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+       if (!power_mgt) {
+               pr_warn("power-mgt node not found\n");
+               return -ENODEV;
+       }
+
+       if (of_property_read_u32(power_mgt, "ibm,pstate-min", &pstate_min)) {
+               pr_warn("ibm,pstate-min node not found\n");
+               return -ENODEV;
+       }
+
+       if (of_property_read_u32(power_mgt, "ibm,pstate-max", &pstate_max)) {
+               pr_warn("ibm,pstate-max node not found\n");
+               return -ENODEV;
+       }
+
+       if (of_property_read_u32(power_mgt, "ibm,pstate-nominal",
+                                &pstate_nominal)) {
+               pr_warn("ibm,pstate-nominal not found\n");
+               return -ENODEV;
+       }
+       pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
+               pstate_nominal, pstate_max);
+
+       pstate_ids = of_get_property(power_mgt, "ibm,pstate-ids", &len_ids);
+       if (!pstate_ids) {
+               pr_warn("ibm,pstate-ids not found\n");
+               return -ENODEV;
+       }
+
+       pstate_freqs = of_get_property(power_mgt, "ibm,pstate-frequencies-mhz",
+                                     &len_freqs);
+       if (!pstate_freqs) {
+               pr_warn("ibm,pstate-frequencies-mhz not found\n");
+               return -ENODEV;
+       }
+
+       WARN_ON(len_ids != len_freqs);
+       nr_pstates = min(len_ids, len_freqs) / sizeof(u32);
+       WARN_ON(!nr_pstates);
+
+       pr_debug("NR PStates %d\n", nr_pstates);
+       for (i = 0; i < nr_pstates; i++) {
+               u32 id = be32_to_cpu(pstate_ids[i]);
+               u32 freq = be32_to_cpu(pstate_freqs[i]);
+
+               pr_debug("PState id %d freq %d MHz\n", id, freq);
+               powernv_freqs[i].driver_data = i;
+               powernv_freqs[i].frequency = freq * 1000; /* kHz */
+               powernv_pstate_ids[i] = id;
+       }
+       /* End of list marker entry */
+       powernv_freqs[i].driver_data = 0;
+       powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
+
+       /* Print frequency table */
+       for (i = 0; powernv_freqs[i].frequency != CPUFREQ_TABLE_END; i++)
+               pr_debug("%d: %d\n", i, powernv_freqs[i].frequency);
+
+       return 0;
+}
+
+static struct freq_attr *powernv_cpu_freq_attr[] = {
+       &cpufreq_freq_attr_scaling_available_freqs,
+       NULL,
+};
+
+/* Helper routines */
+
+/* Access helpers to power mgt SPR */
+
+static inline unsigned long get_pmspr(unsigned long sprn)
+{
+       switch (sprn) {
+       case SPRN_PMCR:
+               return mfspr(SPRN_PMCR);
+
+       case SPRN_PMICR:
+               return mfspr(SPRN_PMICR);
+
+       case SPRN_PMSR:
+               return mfspr(SPRN_PMSR);
+       }
+       BUG();
+}
+
+static inline void set_pmspr(unsigned long sprn, unsigned long val)
+{
+       switch (sprn) {
+       case SPRN_PMCR:
+               mtspr(SPRN_PMCR, val);
+               return;
+
+       case SPRN_PMICR:
+               mtspr(SPRN_PMICR, val);
+               return;
+
+       case SPRN_PMSR:
+               mtspr(SPRN_PMSR, val);
+               return;
+       }
+       BUG();
+}
+
+static void set_pstate(void *pstate)
+{
+       unsigned long val;
+       unsigned long pstate_ul = *(unsigned long *) pstate;
+
+       val = get_pmspr(SPRN_PMCR);
+       val = val & 0x0000ffffffffffffULL;
+       /* Set both global(bits 56..63) and local(bits 48..55) PStates */
+       val = val | (pstate_ul << 56) | (pstate_ul << 48);
+       pr_debug("Setting cpu %d pmcr to %016lX\n", smp_processor_id(), val);
+       set_pmspr(SPRN_PMCR, val);
+}
+
+static int powernv_set_freq(cpumask_var_t cpus, unsigned int new_index)
+{
+       unsigned long val = (unsigned long) powernv_pstate_ids[new_index];
+
+       /*
+        * Use smp_call_function to send IPI and execute the
+        * mtspr on target cpu.  We could do that without IPI
+        * if current CPU is within policy->cpus (core)
+        */
+
+       val = val & 0xFF;
+       smp_call_function_any(cpus, set_pstate, &val, 1);
+       return 0;
+}
+
+static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+       int base, i;
+
+#ifdef CONFIG_SMP
+       base = cpu_first_thread_sibling(policy->cpu);
+
+       for (i = 0; i < threads_per_core; i++)
+               cpumask_set_cpu(base + i, policy->cpus);
+#endif
+       policy->cpuinfo.transition_latency = 25000;
+
+       policy->cur = powernv_freqs[0].frequency;
+       cpufreq_frequency_table_get_attr(powernv_freqs, policy->cpu);
+       return cpufreq_frequency_table_cpuinfo(policy, powernv_freqs);
+}
+
+static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+       cpufreq_frequency_table_put_attr(policy->cpu);
+       return 0;
+}
+
+static int powernv_cpufreq_verify(struct cpufreq_policy *policy)
+{
+       return cpufreq_frequency_table_verify(policy, powernv_freqs);
+}
+
+static int powernv_cpufreq_target(struct cpufreq_policy *policy,
+                             unsigned int target_freq,
+                             unsigned int relation)
+{
+       int rc;
+       struct cpufreq_freqs freqs;
+       unsigned int new_index;
+
+       cpufreq_frequency_table_target(policy, powernv_freqs, target_freq,
+                                      relation, &new_index);
+
+       freqs.old = policy->cur;
+       freqs.new = powernv_freqs[new_index].frequency;
+       freqs.cpu = policy->cpu;
+
+       mutex_lock(&freq_switch_mutex);
+       cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
+
+       pr_debug("setting frequency for cpu %d to %d kHz index %d pstate %d",
+                policy->cpu,
+                powernv_freqs[new_index].frequency,
+                new_index,
+                powernv_pstate_ids[new_index]);
+
+       rc = powernv_set_freq(policy->cpus, new_index);
+
+       cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
+       mutex_unlock(&freq_switch_mutex);
+
+       return rc;
+}
+
+static struct cpufreq_driver powernv_cpufreq_driver = {
+       .verify         = powernv_cpufreq_verify,
+       .target         = powernv_cpufreq_target,
+       .init           = powernv_cpufreq_cpu_init,
+       .exit           = powernv_cpufreq_cpu_exit,
+       .name           = "powernv-cpufreq",
+       .flags          = CPUFREQ_CONST_LOOPS,
+       .attr           = powernv_cpu_freq_attr,
+};
+
+static int __init powernv_cpufreq_init(void)
+{
+       int rc = 0;
+
+       /* Discover pstates from device tree and init */
+
+       rc = init_powernv_pstates();
+
+       if (rc) {
+               pr_info("powernv-cpufreq disabled\n");
+               return rc;
+       }
+
+       rc = cpufreq_register_driver(&powernv_cpufreq_driver);
+       return rc;
+}
+
+static void __exit powernv_cpufreq_exit(void)
+{
+       cpufreq_unregister_driver(&powernv_cpufreq_driver);
+}
+
+module_init(powernv_cpufreq_init);
+module_exit(powernv_cpufreq_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>");
-- 
1.8.3.1

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to