Re: [PATCH v9 15/15] OPTIONAL: cpufreq: dt: Register an Energy Model

2018-11-20 Thread Quentin Perret
Hi Viresh,

On Tuesday 20 Nov 2018 at 11:49:25 (+0530), Viresh Kumar wrote:
> On 19-11-18, 14:18, Quentin Perret wrote:
> >  static int cpufreq_init(struct cpufreq_policy *policy)
> >  {
> > +   struct em_data_callback em_cb = EM_DATA_CB(of_est_power);
> > struct cpufreq_frequency_table *freq_table;
> > struct opp_table *opp_table = NULL;
> > struct private_data *priv;
> > @@ -160,7 +203,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
> > unsigned int transition_latency;
> > bool fallback = false;
> > const char *name;
> > -   int ret;
> > +   int ret, nr_opp;
> >  
> > cpu_dev = get_cpu_device(policy->cpu);
> > if (!cpu_dev) {
> > @@ -237,6 +280,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
> > ret = -EPROBE_DEFER;
> > goto out_free_opp;
> > }
> > +   nr_opp = ret;
> >  
> > if (fallback) {
> > cpumask_setall(policy->cpus);
> > @@ -280,6 +324,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
> > policy->cpuinfo.transition_latency = transition_latency;
> > policy->dvfs_possible_from_any_cpu = true;
> >  
> > +   em_register_perf_domain(policy->cpus, nr_opp, _cb);
> > +
> > return 0;
> >  
> >  out_free_cpufreq_table:
> 
> I haven't gone deep into the series, but why don't we need something
> like em_unregister_perf_domain()? That can be used if the cpufreq
> driver goes away. Else loading/unloading/loading the cpufreq driver
> may register the perf-domain callback again.

Right, that's a good point. Registering the perf-domain multiple times
is harmless -- all but the first registration will be ignored. That
_should_ be documented somewhere in patch 03. I'll double check and add
the doc if that's not the case.

The overall idea so far has been to keep the EM framework as simple as
possible. We allocate the EM once and it stays in memory forever. That
makes it really easy for the scheduler (for instance) to manipulate
pointers to perf domains without having to worry about them being
unregistered. We could definitely do something smarter to
register/unregister the PDs dynamically using refcount or something,
but hopefully this is something we can do later, if need be.

Thanks,
Quentin


Re: [PATCH v9 15/15] OPTIONAL: cpufreq: dt: Register an Energy Model

2018-11-20 Thread Quentin Perret
Hi Viresh,

On Tuesday 20 Nov 2018 at 11:49:25 (+0530), Viresh Kumar wrote:
> On 19-11-18, 14:18, Quentin Perret wrote:
> >  static int cpufreq_init(struct cpufreq_policy *policy)
> >  {
> > +   struct em_data_callback em_cb = EM_DATA_CB(of_est_power);
> > struct cpufreq_frequency_table *freq_table;
> > struct opp_table *opp_table = NULL;
> > struct private_data *priv;
> > @@ -160,7 +203,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
> > unsigned int transition_latency;
> > bool fallback = false;
> > const char *name;
> > -   int ret;
> > +   int ret, nr_opp;
> >  
> > cpu_dev = get_cpu_device(policy->cpu);
> > if (!cpu_dev) {
> > @@ -237,6 +280,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
> > ret = -EPROBE_DEFER;
> > goto out_free_opp;
> > }
> > +   nr_opp = ret;
> >  
> > if (fallback) {
> > cpumask_setall(policy->cpus);
> > @@ -280,6 +324,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
> > policy->cpuinfo.transition_latency = transition_latency;
> > policy->dvfs_possible_from_any_cpu = true;
> >  
> > +   em_register_perf_domain(policy->cpus, nr_opp, _cb);
> > +
> > return 0;
> >  
> >  out_free_cpufreq_table:
> 
> I haven't gone deep into the series, but why don't we need something
> like em_unregister_perf_domain()? That can be used if the cpufreq
> driver goes away. Else loading/unloading/loading the cpufreq driver
> may register the perf-domain callback again.

Right, that's a good point. Registering the perf-domain multiple times
is harmless -- all but the first registration will be ignored. That
_should_ be documented somewhere in patch 03. I'll double check and add
the doc if that's not the case.

The overall idea so far has been to keep the EM framework as simple as
possible. We allocate the EM once and it stays in memory forever. That
makes it really easy for the scheduler (for instance) to manipulate
pointers to perf domains without having to worry about them being
unregistered. We could definitely do something smarter to
register/unregister the PDs dynamically using refcount or something,
but hopefully this is something we can do later, if need be.

Thanks,
Quentin


Re: [PATCH v9 15/15] OPTIONAL: cpufreq: dt: Register an Energy Model

2018-11-19 Thread Viresh Kumar
On 19-11-18, 14:18, Quentin Perret wrote:
>  static int cpufreq_init(struct cpufreq_policy *policy)
>  {
> + struct em_data_callback em_cb = EM_DATA_CB(of_est_power);
>   struct cpufreq_frequency_table *freq_table;
>   struct opp_table *opp_table = NULL;
>   struct private_data *priv;
> @@ -160,7 +203,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
>   unsigned int transition_latency;
>   bool fallback = false;
>   const char *name;
> - int ret;
> + int ret, nr_opp;
>  
>   cpu_dev = get_cpu_device(policy->cpu);
>   if (!cpu_dev) {
> @@ -237,6 +280,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
>   ret = -EPROBE_DEFER;
>   goto out_free_opp;
>   }
> + nr_opp = ret;
>  
>   if (fallback) {
>   cpumask_setall(policy->cpus);
> @@ -280,6 +324,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
>   policy->cpuinfo.transition_latency = transition_latency;
>   policy->dvfs_possible_from_any_cpu = true;
>  
> + em_register_perf_domain(policy->cpus, nr_opp, _cb);
> +
>   return 0;
>  
>  out_free_cpufreq_table:

I haven't gone deep into the series, but why don't we need something
like em_unregister_perf_domain()? That can be used if the cpufreq
driver goes away. Else loading/unloading/loading the cpufreq driver
may register the perf-domain callback again.

-- 
viresh


Re: [PATCH v9 15/15] OPTIONAL: cpufreq: dt: Register an Energy Model

2018-11-19 Thread Viresh Kumar
On 19-11-18, 14:18, Quentin Perret wrote:
>  static int cpufreq_init(struct cpufreq_policy *policy)
>  {
> + struct em_data_callback em_cb = EM_DATA_CB(of_est_power);
>   struct cpufreq_frequency_table *freq_table;
>   struct opp_table *opp_table = NULL;
>   struct private_data *priv;
> @@ -160,7 +203,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
>   unsigned int transition_latency;
>   bool fallback = false;
>   const char *name;
> - int ret;
> + int ret, nr_opp;
>  
>   cpu_dev = get_cpu_device(policy->cpu);
>   if (!cpu_dev) {
> @@ -237,6 +280,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
>   ret = -EPROBE_DEFER;
>   goto out_free_opp;
>   }
> + nr_opp = ret;
>  
>   if (fallback) {
>   cpumask_setall(policy->cpus);
> @@ -280,6 +324,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
>   policy->cpuinfo.transition_latency = transition_latency;
>   policy->dvfs_possible_from_any_cpu = true;
>  
> + em_register_perf_domain(policy->cpus, nr_opp, _cb);
> +
>   return 0;
>  
>  out_free_cpufreq_table:

I haven't gone deep into the series, but why don't we need something
like em_unregister_perf_domain()? That can be used if the cpufreq
driver goes away. Else loading/unloading/loading the cpufreq driver
may register the perf-domain callback again.

-- 
viresh


[PATCH v9 15/15] OPTIONAL: cpufreq: dt: Register an Energy Model

2018-11-19 Thread Quentin Perret
***
* This patch illustrates the usage of the newly introduced Energy *
* Model framework and isn't supposed to be merged as-is.  *
***

The Energy Model framework provides an API to register the active power
of CPUs. Call this API from the cpufreq-dt driver with an estimation
of the power as P = C * V^2 * f with C, V, and f respectively the
capacitance of the CPU and the voltage and frequency of the OPP.

The CPU capacitance is read from the "dynamic-power-coefficient" DT
binding (originally introduced for thermal/IPA), and the voltage and
frequency values from PM_OPP.

Cc: "Rafael J. Wysocki" 
Cc: Viresh Kumar 
Signed-off-by: Quentin Perret 
---
 drivers/cpufreq/cpufreq-dt.c | 48 +++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index e58bfcb1169e..4cfef5554d86 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -150,8 +151,50 @@ static int resources_available(void)
return 0;
 }
 
+static int __maybe_unused of_est_power(unsigned long *mW, unsigned long *KHz,
+  int cpu)
+{
+   unsigned long mV, Hz, MHz;
+   struct device *cpu_dev;
+   struct dev_pm_opp *opp;
+   struct device_node *np;
+   u32 cap;
+   u64 tmp;
+
+   cpu_dev = get_cpu_device(cpu);
+   if (!cpu_dev)
+   return -ENODEV;
+
+   np = of_node_get(cpu_dev->of_node);
+   if (!np)
+   return -EINVAL;
+
+   if (of_property_read_u32(np, "dynamic-power-coefficient", ))
+   return -EINVAL;
+
+   Hz = *KHz * 1000;
+   opp = dev_pm_opp_find_freq_ceil(cpu_dev, );
+   if (IS_ERR(opp))
+   return -EINVAL;
+
+   mV = dev_pm_opp_get_voltage(opp) / 1000;
+   dev_pm_opp_put(opp);
+   if (!mV)
+   return -EINVAL;
+
+   MHz = Hz / 100;
+   tmp = (u64)cap * mV * mV * MHz;
+   do_div(tmp, 10);
+
+   *mW = (unsigned long)tmp;
+   *KHz = Hz / 1000;
+
+   return 0;
+}
+
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
+   struct em_data_callback em_cb = EM_DATA_CB(of_est_power);
struct cpufreq_frequency_table *freq_table;
struct opp_table *opp_table = NULL;
struct private_data *priv;
@@ -160,7 +203,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
unsigned int transition_latency;
bool fallback = false;
const char *name;
-   int ret;
+   int ret, nr_opp;
 
cpu_dev = get_cpu_device(policy->cpu);
if (!cpu_dev) {
@@ -237,6 +280,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
ret = -EPROBE_DEFER;
goto out_free_opp;
}
+   nr_opp = ret;
 
if (fallback) {
cpumask_setall(policy->cpus);
@@ -280,6 +324,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
policy->cpuinfo.transition_latency = transition_latency;
policy->dvfs_possible_from_any_cpu = true;
 
+   em_register_perf_domain(policy->cpus, nr_opp, _cb);
+
return 0;
 
 out_free_cpufreq_table:
-- 
2.19.1



[PATCH v9 15/15] OPTIONAL: cpufreq: dt: Register an Energy Model

2018-11-19 Thread Quentin Perret
***
* This patch illustrates the usage of the newly introduced Energy *
* Model framework and isn't supposed to be merged as-is.  *
***

The Energy Model framework provides an API to register the active power
of CPUs. Call this API from the cpufreq-dt driver with an estimation
of the power as P = C * V^2 * f with C, V, and f respectively the
capacitance of the CPU and the voltage and frequency of the OPP.

The CPU capacitance is read from the "dynamic-power-coefficient" DT
binding (originally introduced for thermal/IPA), and the voltage and
frequency values from PM_OPP.

Cc: "Rafael J. Wysocki" 
Cc: Viresh Kumar 
Signed-off-by: Quentin Perret 
---
 drivers/cpufreq/cpufreq-dt.c | 48 +++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index e58bfcb1169e..4cfef5554d86 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -150,8 +151,50 @@ static int resources_available(void)
return 0;
 }
 
+static int __maybe_unused of_est_power(unsigned long *mW, unsigned long *KHz,
+  int cpu)
+{
+   unsigned long mV, Hz, MHz;
+   struct device *cpu_dev;
+   struct dev_pm_opp *opp;
+   struct device_node *np;
+   u32 cap;
+   u64 tmp;
+
+   cpu_dev = get_cpu_device(cpu);
+   if (!cpu_dev)
+   return -ENODEV;
+
+   np = of_node_get(cpu_dev->of_node);
+   if (!np)
+   return -EINVAL;
+
+   if (of_property_read_u32(np, "dynamic-power-coefficient", ))
+   return -EINVAL;
+
+   Hz = *KHz * 1000;
+   opp = dev_pm_opp_find_freq_ceil(cpu_dev, );
+   if (IS_ERR(opp))
+   return -EINVAL;
+
+   mV = dev_pm_opp_get_voltage(opp) / 1000;
+   dev_pm_opp_put(opp);
+   if (!mV)
+   return -EINVAL;
+
+   MHz = Hz / 100;
+   tmp = (u64)cap * mV * mV * MHz;
+   do_div(tmp, 10);
+
+   *mW = (unsigned long)tmp;
+   *KHz = Hz / 1000;
+
+   return 0;
+}
+
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
+   struct em_data_callback em_cb = EM_DATA_CB(of_est_power);
struct cpufreq_frequency_table *freq_table;
struct opp_table *opp_table = NULL;
struct private_data *priv;
@@ -160,7 +203,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
unsigned int transition_latency;
bool fallback = false;
const char *name;
-   int ret;
+   int ret, nr_opp;
 
cpu_dev = get_cpu_device(policy->cpu);
if (!cpu_dev) {
@@ -237,6 +280,7 @@ static int cpufreq_init(struct cpufreq_policy *policy)
ret = -EPROBE_DEFER;
goto out_free_opp;
}
+   nr_opp = ret;
 
if (fallback) {
cpumask_setall(policy->cpus);
@@ -280,6 +324,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
policy->cpuinfo.transition_latency = transition_latency;
policy->dvfs_possible_from_any_cpu = true;
 
+   em_register_perf_domain(policy->cpus, nr_opp, _cb);
+
return 0;
 
 out_free_cpufreq_table:
-- 
2.19.1