Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-31 Thread Peter Zijlstra
On Mon, Aug 31, 2015 at 03:25:35PM +0200, Peter Zijlstra wrote:
> On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> > +static ssize_t show_power_acc(struct device *dev,
> > + struct device_attribute *attr, char *buf)
> > +{
> > +   int cpu, cu, cu_num, cores_per_cu;
> > +   u64 curr_cu_acc_power[MAX_CUS],
> > +   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> > +   u64 tdelta, avg_acc;
> > +   struct fam15h_power_data *data = dev_get_drvdata(dev);
> > +
> > +   cores_per_cu = amd_get_cores_per_cu();
> > +   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> > +
> > +   for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> > cores_per_cu) {
> > +   cu = cpu / cores_per_cu;
> > +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
> > +   pr_err("Failed to read PTSC counter MSR on core%d\n",
> > +  cpu);
> > +   return 0;
> > +   }
> > +
> > +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> > +  _cu_acc_power[cu])) {
> > +   pr_err("Failed to read compute unit power accumulator 
> > MSR on core%d\n",
> > +  cpu);
> > +   return 0;
> > +   }
> > +
> > +   if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> > +   jdelta[cu] = data->max_cu_acc_power + 
> > curr_cu_acc_power[cu];
> > +   jdelta[cu] -= data->cu_acc_power[cu];
> > +   } else {
> > +   jdelta[cu] = curr_cu_acc_power[cu] - 
> > data->cu_acc_power[cu];
> > +   }
> > +   tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> > +   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> > +   do_div(jdelta[cu], tdelta);
> > +
> > +   mutex_lock(>acc_pwr_mutex);
> > +   data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> > +   data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> > +   mutex_unlock(>acc_pwr_mutex);
> > +
> > +   /* the unit is microWatt */
> > +   avg_acc += jdelta[cu];
> > +   }
> > +
> > +   return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> > +}
> > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
> 
> This is a world readable file that sprays IPIs across the entire system,
> how is that a good idea?

FWIW this driver is also hotplug challenged. Not to mention it relies on
cpu number layout in unhealthy ways.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-31 Thread Peter Zijlstra
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> +static ssize_t show_power_acc(struct device *dev,
> +   struct device_attribute *attr, char *buf)
> +{
> + int cpu, cu, cu_num, cores_per_cu;
> + u64 curr_cu_acc_power[MAX_CUS],
> + curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> + u64 tdelta, avg_acc;
> + struct fam15h_power_data *data = dev_get_drvdata(dev);
> +
> + cores_per_cu = amd_get_cores_per_cu();
> + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> +
> + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> cores_per_cu) {
> + cu = cpu / cores_per_cu;
> + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
> + pr_err("Failed to read PTSC counter MSR on core%d\n",
> +cpu);
> + return 0;
> + }
> +
> + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> +_cu_acc_power[cu])) {
> + pr_err("Failed to read compute unit power accumulator 
> MSR on core%d\n",
> +cpu);
> + return 0;
> + }
> +
> + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> + jdelta[cu] = data->max_cu_acc_power + 
> curr_cu_acc_power[cu];
> + jdelta[cu] -= data->cu_acc_power[cu];
> + } else {
> + jdelta[cu] = curr_cu_acc_power[cu] - 
> data->cu_acc_power[cu];
> + }
> + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> + do_div(jdelta[cu], tdelta);
> +
> + mutex_lock(>acc_pwr_mutex);
> + data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> + mutex_unlock(>acc_pwr_mutex);
> +
> + /* the unit is microWatt */
> + avg_acc += jdelta[cu];
> + }
> +
> + return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> +}
> +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);

This is a world readable file that sprays IPIs across the entire system,
how is that a good idea?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-31 Thread Huang Rui
On Sun, Aug 30, 2015 at 09:30:28PM -0700, Guenter Roeck wrote:
> On 08/30/2015 09:16 PM, Huang Rui wrote:
> >On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote:
> >>On 08/28/2015 03:45 AM, Huang Rui wrote:
> >>>On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:
> On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> >This patch introduces an algorithm that computes the average power by
> >reading a delta value of “core power accumulator” register during
> >measurement interval, and then dividing delta value by the length of
> >the time interval.
> >
> >User is able to use power1_acc entry to measure the processor power
> >consumption and power1_acc just needs to be read twice with an needed
> >interval in-between.
> >
> >A simple example:
> >
> >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> >$ sleep 1s
> >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> >
> >The result is current average processor power consumption in 1
> >seconds. The unit of the result is uWatt.
> >
> >Signed-off-by: Huang Rui 
> >---
> >  drivers/hwmon/fam15h_power.c | 62 
> > 
> >  1 file changed, 62 insertions(+)
> >
> >diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
> >index d529e4b..3bab797 100644
> >--- a/drivers/hwmon/fam15h_power.c
> >+++ b/drivers/hwmon/fam15h_power.c
> >@@ -60,6 +60,7 @@ struct fam15h_power_data {
> > u64 cu_acc_power[MAX_CUS];
> > /* performance timestamp counter */
> > u64 cpu_sw_pwr_ptsc[MAX_CUS];
> >+struct mutex acc_pwr_mutex;
> >  };
> >
> >  static ssize_t show_power(struct device *dev,
> >@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
> >show_power_crit, NULL);
> >  static struct attribute_group fam15h_power_group;
> >  __ATTRIBUTE_GROUPS(fam15h_power);
> >
> >+static ssize_t show_power_acc(struct device *dev,
> >+  struct device_attribute *attr, char *buf)
> >+{
> >+int cpu, cu, cu_num, cores_per_cu;
> >+u64 curr_cu_acc_power[MAX_CUS],
> >+curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> >+u64 tdelta, avg_acc;
> >+struct fam15h_power_data *data = dev_get_drvdata(dev);
> >+
> >+cores_per_cu = amd_get_cores_per_cu();
> >+cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> >+
> >+for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> >cores_per_cu) {
> >+cu = cpu / cores_per_cu;
> >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, 
> >_ptsc[cu])) {
> >+pr_err("Failed to read PTSC counter MSR on 
> >core%d\n",
> >+   cpu);
> >+return 0;
> >+}
> >+
> >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> >+   _cu_acc_power[cu])) {
> >+pr_err("Failed to read compute unit power 
> >accumulator MSR on core%d\n",
> >+   cpu);
> >+return 0;
> >+}
> >+
> >+if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> >+jdelta[cu] = data->max_cu_acc_power + 
> >curr_cu_acc_power[cu];
> >+jdelta[cu] -= data->cu_acc_power[cu];
> >+} else {
> >+jdelta[cu] = curr_cu_acc_power[cu] - 
> >data->cu_acc_power[cu];
> >+}
> >+tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> >+jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> >+do_div(jdelta[cu], tdelta);
> >+
> >+mutex_lock(>acc_pwr_mutex);
> >+data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> >+data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> >+mutex_unlock(>acc_pwr_mutex);
> >+
> >+/* the unit is microWatt */
> >+avg_acc += jdelta[cu];
> >+}
> >+
> >+return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> >+}
> >+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
> 
> I am not really a friend of introducing a non-standard attribute.
> Does the energy attribute not work here ?
> 
> >>>
> >>>You're right. Non-standard attribute might not be good. Could you
> >>>please give me some hints if I use "energy" instead?
> >>>
> >>1 Joule = 1 Watt-second.
> >>
> >>Something else, though - did you make sure that your code doesn't overflow ?
> >>Even 

Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-31 Thread Peter Zijlstra
On Mon, Aug 31, 2015 at 03:25:35PM +0200, Peter Zijlstra wrote:
> On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> > +static ssize_t show_power_acc(struct device *dev,
> > + struct device_attribute *attr, char *buf)
> > +{
> > +   int cpu, cu, cu_num, cores_per_cu;
> > +   u64 curr_cu_acc_power[MAX_CUS],
> > +   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> > +   u64 tdelta, avg_acc;
> > +   struct fam15h_power_data *data = dev_get_drvdata(dev);
> > +
> > +   cores_per_cu = amd_get_cores_per_cu();
> > +   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> > +
> > +   for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> > cores_per_cu) {
> > +   cu = cpu / cores_per_cu;
> > +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
> > +   pr_err("Failed to read PTSC counter MSR on core%d\n",
> > +  cpu);
> > +   return 0;
> > +   }
> > +
> > +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> > +  _cu_acc_power[cu])) {
> > +   pr_err("Failed to read compute unit power accumulator 
> > MSR on core%d\n",
> > +  cpu);
> > +   return 0;
> > +   }
> > +
> > +   if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> > +   jdelta[cu] = data->max_cu_acc_power + 
> > curr_cu_acc_power[cu];
> > +   jdelta[cu] -= data->cu_acc_power[cu];
> > +   } else {
> > +   jdelta[cu] = curr_cu_acc_power[cu] - 
> > data->cu_acc_power[cu];
> > +   }
> > +   tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> > +   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> > +   do_div(jdelta[cu], tdelta);
> > +
> > +   mutex_lock(>acc_pwr_mutex);
> > +   data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> > +   data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> > +   mutex_unlock(>acc_pwr_mutex);
> > +
> > +   /* the unit is microWatt */
> > +   avg_acc += jdelta[cu];
> > +   }
> > +
> > +   return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> > +}
> > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
> 
> This is a world readable file that sprays IPIs across the entire system,
> how is that a good idea?

FWIW this driver is also hotplug challenged. Not to mention it relies on
cpu number layout in unhealthy ways.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-31 Thread Peter Zijlstra
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> +static ssize_t show_power_acc(struct device *dev,
> +   struct device_attribute *attr, char *buf)
> +{
> + int cpu, cu, cu_num, cores_per_cu;
> + u64 curr_cu_acc_power[MAX_CUS],
> + curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> + u64 tdelta, avg_acc;
> + struct fam15h_power_data *data = dev_get_drvdata(dev);
> +
> + cores_per_cu = amd_get_cores_per_cu();
> + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> +
> + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> cores_per_cu) {
> + cu = cpu / cores_per_cu;
> + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
> + pr_err("Failed to read PTSC counter MSR on core%d\n",
> +cpu);
> + return 0;
> + }
> +
> + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> +_cu_acc_power[cu])) {
> + pr_err("Failed to read compute unit power accumulator 
> MSR on core%d\n",
> +cpu);
> + return 0;
> + }
> +
> + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> + jdelta[cu] = data->max_cu_acc_power + 
> curr_cu_acc_power[cu];
> + jdelta[cu] -= data->cu_acc_power[cu];
> + } else {
> + jdelta[cu] = curr_cu_acc_power[cu] - 
> data->cu_acc_power[cu];
> + }
> + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> + do_div(jdelta[cu], tdelta);
> +
> + mutex_lock(>acc_pwr_mutex);
> + data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> + mutex_unlock(>acc_pwr_mutex);
> +
> + /* the unit is microWatt */
> + avg_acc += jdelta[cu];
> + }
> +
> + return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> +}
> +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);

This is a world readable file that sprays IPIs across the entire system,
how is that a good idea?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-31 Thread Huang Rui
On Sun, Aug 30, 2015 at 09:30:28PM -0700, Guenter Roeck wrote:
> On 08/30/2015 09:16 PM, Huang Rui wrote:
> >On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote:
> >>On 08/28/2015 03:45 AM, Huang Rui wrote:
> >>>On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:
> On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> >This patch introduces an algorithm that computes the average power by
> >reading a delta value of “core power accumulator” register during
> >measurement interval, and then dividing delta value by the length of
> >the time interval.
> >
> >User is able to use power1_acc entry to measure the processor power
> >consumption and power1_acc just needs to be read twice with an needed
> >interval in-between.
> >
> >A simple example:
> >
> >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> >$ sleep 1s
> >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> >
> >The result is current average processor power consumption in 1
> >seconds. The unit of the result is uWatt.
> >
> >Signed-off-by: Huang Rui 
> >---
> >  drivers/hwmon/fam15h_power.c | 62 
> > 
> >  1 file changed, 62 insertions(+)
> >
> >diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
> >index d529e4b..3bab797 100644
> >--- a/drivers/hwmon/fam15h_power.c
> >+++ b/drivers/hwmon/fam15h_power.c
> >@@ -60,6 +60,7 @@ struct fam15h_power_data {
> > u64 cu_acc_power[MAX_CUS];
> > /* performance timestamp counter */
> > u64 cpu_sw_pwr_ptsc[MAX_CUS];
> >+struct mutex acc_pwr_mutex;
> >  };
> >
> >  static ssize_t show_power(struct device *dev,
> >@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
> >show_power_crit, NULL);
> >  static struct attribute_group fam15h_power_group;
> >  __ATTRIBUTE_GROUPS(fam15h_power);
> >
> >+static ssize_t show_power_acc(struct device *dev,
> >+  struct device_attribute *attr, char *buf)
> >+{
> >+int cpu, cu, cu_num, cores_per_cu;
> >+u64 curr_cu_acc_power[MAX_CUS],
> >+curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> >+u64 tdelta, avg_acc;
> >+struct fam15h_power_data *data = dev_get_drvdata(dev);
> >+
> >+cores_per_cu = amd_get_cores_per_cu();
> >+cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> >+
> >+for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> >cores_per_cu) {
> >+cu = cpu / cores_per_cu;
> >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, 
> >_ptsc[cu])) {
> >+pr_err("Failed to read PTSC counter MSR on 
> >core%d\n",
> >+   cpu);
> >+return 0;
> >+}
> >+
> >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> >+   _cu_acc_power[cu])) {
> >+pr_err("Failed to read compute unit power 
> >accumulator MSR on core%d\n",
> >+   cpu);
> >+return 0;
> >+}
> >+
> >+if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> >+jdelta[cu] = data->max_cu_acc_power + 
> >curr_cu_acc_power[cu];
> >+jdelta[cu] -= data->cu_acc_power[cu];
> >+} else {
> >+jdelta[cu] = curr_cu_acc_power[cu] - 
> >data->cu_acc_power[cu];
> >+}
> >+tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> >+jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> >+do_div(jdelta[cu], tdelta);
> >+
> >+mutex_lock(>acc_pwr_mutex);
> >+data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> >+data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> >+mutex_unlock(>acc_pwr_mutex);
> >+
> >+/* the unit is microWatt */
> >+avg_acc += jdelta[cu];
> >+}
> >+
> >+return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> >+}
> >+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
> 
> I am not really a friend of introducing a non-standard attribute.
> Does the energy attribute not work here ?
> 
> >>>
> >>>You're right. Non-standard attribute might not be good. Could you
> >>>please give me some hints if I use "energy" instead?
> >>>
> >>1 Joule = 1 Watt-second.
> >>
> >>Something else, though - did you make sure that your code doesn't 

Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-30 Thread Guenter Roeck

On 08/30/2015 09:16 PM, Huang Rui wrote:

On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote:

On 08/28/2015 03:45 AM, Huang Rui wrote:

On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:

On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:

This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_acc entry to measure the processor power
consumption and power1_acc just needs to be read twice with an needed
interval in-between.

A simple example:

$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
$ sleep 1s
$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc

The result is current average processor power consumption in 1
seconds. The unit of the result is uWatt.

Signed-off-by: Huang Rui 
---
  drivers/hwmon/fam15h_power.c | 62 
  1 file changed, 62 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index d529e4b..3bab797 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -60,6 +60,7 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   struct mutex acc_pwr_mutex;
  };

  static ssize_t show_power(struct device *dev,
@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, 
NULL);
  static struct attribute_group fam15h_power_group;
  __ATTRIBUTE_GROUPS(fam15h_power);

+static ssize_t show_power_acc(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+   int cpu, cu, cu_num, cores_per_cu;
+   u64 curr_cu_acc_power[MAX_CUS],
+   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   cores_per_cu = amd_get_cores_per_cu();
+   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
+
+   for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
cores_per_cu) {
+   cu = cpu / cores_per_cu;
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
+   pr_err("Failed to read PTSC counter MSR on core%d\n",
+  cpu);
+   return 0;
+   }
+
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
+  _cu_acc_power[cu])) {
+   pr_err("Failed to read compute unit power accumulator MSR on 
core%d\n",
+  cpu);
+   return 0;
+   }
+
+   if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
+   jdelta[cu] = data->max_cu_acc_power + 
curr_cu_acc_power[cu];
+   jdelta[cu] -= data->cu_acc_power[cu];
+   } else {
+   jdelta[cu] = curr_cu_acc_power[cu] - 
data->cu_acc_power[cu];
+   }
+   tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
+   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   mutex_lock(>acc_pwr_mutex);
+   data->cu_acc_power[cu] = curr_cu_acc_power[cu];
+   data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
+   mutex_unlock(>acc_pwr_mutex);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, "%u\n", (unsigned int) avg_acc);
+}
+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);


I am not really a friend of introducing a non-standard attribute.
Does the energy attribute not work here ?



You're right. Non-standard attribute might not be good. Could you
please give me some hints if I use "energy" instead?


1 Joule = 1 Watt-second.

Something else, though - did you make sure that your code doesn't overflow ?
Even though you calculate the average in an u64, you display it as unsigned.



Thanks to your reminder. It should not be overflow. The maximum power
consumption of processor (AMD CZ and future 15h) is about 15 Watts =
15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24 < 32 < 64 bits.

Actually, the unit of jdelta is not Joule. Because the tdelta is the
loops (cycles) that PTSC counter (the freqency is about 100 MHz)
counts not seconds.

So avg_acc is the average power consumption not the accumulated energy.



Would power1_average then be better suitable for the attribute ?
There is also power1_average_interval which could be used to make
the interval configurable.

Thanks,
Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-30 Thread Huang Rui
On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote:
> On 08/28/2015 03:45 AM, Huang Rui wrote:
> >On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:
> >>On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> >>>This patch introduces an algorithm that computes the average power by
> >>>reading a delta value of “core power accumulator” register during
> >>>measurement interval, and then dividing delta value by the length of
> >>>the time interval.
> >>>
> >>>User is able to use power1_acc entry to measure the processor power
> >>>consumption and power1_acc just needs to be read twice with an needed
> >>>interval in-between.
> >>>
> >>>A simple example:
> >>>
> >>>$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> >>>$ sleep 1s
> >>>$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> >>>
> >>>The result is current average processor power consumption in 1
> >>>seconds. The unit of the result is uWatt.
> >>>
> >>>Signed-off-by: Huang Rui 
> >>>---
> >>>  drivers/hwmon/fam15h_power.c | 62 
> >>> 
> >>>  1 file changed, 62 insertions(+)
> >>>
> >>>diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
> >>>index d529e4b..3bab797 100644
> >>>--- a/drivers/hwmon/fam15h_power.c
> >>>+++ b/drivers/hwmon/fam15h_power.c
> >>>@@ -60,6 +60,7 @@ struct fam15h_power_data {
> >>>   u64 cu_acc_power[MAX_CUS];
> >>>   /* performance timestamp counter */
> >>>   u64 cpu_sw_pwr_ptsc[MAX_CUS];
> >>>+  struct mutex acc_pwr_mutex;
> >>>  };
> >>>
> >>>  static ssize_t show_power(struct device *dev,
> >>>@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
> >>>show_power_crit, NULL);
> >>>  static struct attribute_group fam15h_power_group;
> >>>  __ATTRIBUTE_GROUPS(fam15h_power);
> >>>
> >>>+static ssize_t show_power_acc(struct device *dev,
> >>>+struct device_attribute *attr, char *buf)
> >>>+{
> >>>+  int cpu, cu, cu_num, cores_per_cu;
> >>>+  u64 curr_cu_acc_power[MAX_CUS],
> >>>+  curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> >>>+  u64 tdelta, avg_acc;
> >>>+  struct fam15h_power_data *data = dev_get_drvdata(dev);
> >>>+
> >>>+  cores_per_cu = amd_get_cores_per_cu();
> >>>+  cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> >>>+
> >>>+  for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> >>>cores_per_cu) {
> >>>+  cu = cpu / cores_per_cu;
> >>>+  if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
> >>>+  pr_err("Failed to read PTSC counter MSR on core%d\n",
> >>>+ cpu);
> >>>+  return 0;
> >>>+  }
> >>>+
> >>>+  if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> >>>+ _cu_acc_power[cu])) {
> >>>+  pr_err("Failed to read compute unit power accumulator 
> >>>MSR on core%d\n",
> >>>+ cpu);
> >>>+  return 0;
> >>>+  }
> >>>+
> >>>+  if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> >>>+  jdelta[cu] = data->max_cu_acc_power + 
> >>>curr_cu_acc_power[cu];
> >>>+  jdelta[cu] -= data->cu_acc_power[cu];
> >>>+  } else {
> >>>+  jdelta[cu] = curr_cu_acc_power[cu] - 
> >>>data->cu_acc_power[cu];
> >>>+  }
> >>>+  tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> >>>+  jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> >>>+  do_div(jdelta[cu], tdelta);
> >>>+
> >>>+  mutex_lock(>acc_pwr_mutex);
> >>>+  data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> >>>+  data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> >>>+  mutex_unlock(>acc_pwr_mutex);
> >>>+
> >>>+  /* the unit is microWatt */
> >>>+  avg_acc += jdelta[cu];
> >>>+  }
> >>>+
> >>>+  return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> >>>+}
> >>>+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
> >>
> >>I am not really a friend of introducing a non-standard attribute.
> >>Does the energy attribute not work here ?
> >>
> >
> >You're right. Non-standard attribute might not be good. Could you
> >please give me some hints if I use "energy" instead?
> >
> 1 Joule = 1 Watt-second.
> 
> Something else, though - did you make sure that your code doesn't overflow ?
> Even though you calculate the average in an u64, you display it as unsigned.
> 

Thanks to your reminder. It should not be overflow. The maximum power
consumption of processor (AMD CZ and future 15h) is about 15 Watts =
15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24 < 32 < 64 bits.

Actually, the unit of jdelta is not Joule. Because the tdelta is the
loops (cycles) that PTSC counter (the freqency is about 100 MHz)
counts not seconds.

So avg_acc is the average power consumption not the accumulated energy.

> 100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is
> a 

Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-30 Thread Huang Rui
On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote:
 On 08/28/2015 03:45 AM, Huang Rui wrote:
 On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:
 On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
 This patch introduces an algorithm that computes the average power by
 reading a delta value of “core power accumulator” register during
 measurement interval, and then dividing delta value by the length of
 the time interval.
 
 User is able to use power1_acc entry to measure the processor power
 consumption and power1_acc just needs to be read twice with an needed
 interval in-between.
 
 A simple example:
 
 $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
 $ sleep 1s
 $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
 
 The result is current average processor power consumption in 1
 seconds. The unit of the result is uWatt.
 
 Signed-off-by: Huang Rui ray.hu...@amd.com
 ---
   drivers/hwmon/fam15h_power.c | 62 
  
   1 file changed, 62 insertions(+)
 
 diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
 index d529e4b..3bab797 100644
 --- a/drivers/hwmon/fam15h_power.c
 +++ b/drivers/hwmon/fam15h_power.c
 @@ -60,6 +60,7 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
 +  struct mutex acc_pwr_mutex;
   };
 
   static ssize_t show_power(struct device *dev,
 @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
 show_power_crit, NULL);
   static struct attribute_group fam15h_power_group;
   __ATTRIBUTE_GROUPS(fam15h_power);
 
 +static ssize_t show_power_acc(struct device *dev,
 +struct device_attribute *attr, char *buf)
 +{
 +  int cpu, cu, cu_num, cores_per_cu;
 +  u64 curr_cu_acc_power[MAX_CUS],
 +  curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
 +  u64 tdelta, avg_acc;
 +  struct fam15h_power_data *data = dev_get_drvdata(dev);
 +
 +  cores_per_cu = amd_get_cores_per_cu();
 +  cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
 +
 +  for (cpu = 0, avg_acc = 0; cpu  cu_num * cores_per_cu; cpu += 
 cores_per_cu) {
 +  cu = cpu / cores_per_cu;
 +  if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) {
 +  pr_err(Failed to read PTSC counter MSR on core%d\n,
 + cpu);
 +  return 0;
 +  }
 +
 +  if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
 + curr_cu_acc_power[cu])) {
 +  pr_err(Failed to read compute unit power accumulator 
 MSR on core%d\n,
 + cpu);
 +  return 0;
 +  }
 +
 +  if (curr_cu_acc_power[cu]  data-cu_acc_power[cu]) {
 +  jdelta[cu] = data-max_cu_acc_power + 
 curr_cu_acc_power[cu];
 +  jdelta[cu] -= data-cu_acc_power[cu];
 +  } else {
 +  jdelta[cu] = curr_cu_acc_power[cu] - 
 data-cu_acc_power[cu];
 +  }
 +  tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu];
 +  jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000;
 +  do_div(jdelta[cu], tdelta);
 +
 +  mutex_lock(data-acc_pwr_mutex);
 +  data-cu_acc_power[cu] = curr_cu_acc_power[cu];
 +  data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
 +  mutex_unlock(data-acc_pwr_mutex);
 +
 +  /* the unit is microWatt */
 +  avg_acc += jdelta[cu];
 +  }
 +
 +  return sprintf(buf, %u\n, (unsigned int) avg_acc);
 +}
 +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
 
 I am not really a friend of introducing a non-standard attribute.
 Does the energy attribute not work here ?
 
 
 You're right. Non-standard attribute might not be good. Could you
 please give me some hints if I use energy instead?
 
 1 Joule = 1 Watt-second.
 
 Something else, though - did you make sure that your code doesn't overflow ?
 Even though you calculate the average in an u64, you display it as unsigned.
 

Thanks to your reminder. It should not be overflow. The maximum power
consumption of processor (AMD CZ and future 15h) is about 15 Watts =
15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24  32  64 bits.

Actually, the unit of jdelta is not Joule. Because the tdelta is the
loops (cycles) that PTSC counter (the freqency is about 100 MHz)
counts not seconds.

So avg_acc is the average power consumption not the accumulated energy.

 100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is
 a bit large for an unsigned.
 
 Also, the values should not be reset after reading, but accumulate.
 
 Also, I think your code may be vulnerable to overflows on the CPU register 
 side.
 How long does it take before the CPU counters overflow ?
 

If I use energy, 15w * 10,000s = 150,000,000,000 microWatt-seconds.
Yes, it's large for an unsigned, but suitable for u64.

The accumulated power of 

Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-30 Thread Guenter Roeck

On 08/30/2015 09:16 PM, Huang Rui wrote:

On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote:

On 08/28/2015 03:45 AM, Huang Rui wrote:

On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:

On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:

This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_acc entry to measure the processor power
consumption and power1_acc just needs to be read twice with an needed
interval in-between.

A simple example:

$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
$ sleep 1s
$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc

The result is current average processor power consumption in 1
seconds. The unit of the result is uWatt.

Signed-off-by: Huang Rui ray.hu...@amd.com
---
  drivers/hwmon/fam15h_power.c | 62 
  1 file changed, 62 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index d529e4b..3bab797 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -60,6 +60,7 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   struct mutex acc_pwr_mutex;
  };

  static ssize_t show_power(struct device *dev,
@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, 
NULL);
  static struct attribute_group fam15h_power_group;
  __ATTRIBUTE_GROUPS(fam15h_power);

+static ssize_t show_power_acc(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+   int cpu, cu, cu_num, cores_per_cu;
+   u64 curr_cu_acc_power[MAX_CUS],
+   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   cores_per_cu = amd_get_cores_per_cu();
+   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
+
+   for (cpu = 0, avg_acc = 0; cpu  cu_num * cores_per_cu; cpu += 
cores_per_cu) {
+   cu = cpu / cores_per_cu;
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) {
+   pr_err(Failed to read PTSC counter MSR on core%d\n,
+  cpu);
+   return 0;
+   }
+
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
+  curr_cu_acc_power[cu])) {
+   pr_err(Failed to read compute unit power accumulator MSR on 
core%d\n,
+  cpu);
+   return 0;
+   }
+
+   if (curr_cu_acc_power[cu]  data-cu_acc_power[cu]) {
+   jdelta[cu] = data-max_cu_acc_power + 
curr_cu_acc_power[cu];
+   jdelta[cu] -= data-cu_acc_power[cu];
+   } else {
+   jdelta[cu] = curr_cu_acc_power[cu] - 
data-cu_acc_power[cu];
+   }
+   tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu];
+   jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   mutex_lock(data-acc_pwr_mutex);
+   data-cu_acc_power[cu] = curr_cu_acc_power[cu];
+   data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
+   mutex_unlock(data-acc_pwr_mutex);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, %u\n, (unsigned int) avg_acc);
+}
+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);


I am not really a friend of introducing a non-standard attribute.
Does the energy attribute not work here ?



You're right. Non-standard attribute might not be good. Could you
please give me some hints if I use energy instead?


1 Joule = 1 Watt-second.

Something else, though - did you make sure that your code doesn't overflow ?
Even though you calculate the average in an u64, you display it as unsigned.



Thanks to your reminder. It should not be overflow. The maximum power
consumption of processor (AMD CZ and future 15h) is about 15 Watts =
15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24  32  64 bits.

Actually, the unit of jdelta is not Joule. Because the tdelta is the
loops (cycles) that PTSC counter (the freqency is about 100 MHz)
counts not seconds.

So avg_acc is the average power consumption not the accumulated energy.



Would power1_average then be better suitable for the attribute ?
There is also power1_average_interval which could be used to make
the interval configurable.

Thanks,
Guenter

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-28 Thread Guenter Roeck

On 08/28/2015 03:45 AM, Huang Rui wrote:

On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:

On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:

This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_acc entry to measure the processor power
consumption and power1_acc just needs to be read twice with an needed
interval in-between.

A simple example:

$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
$ sleep 1s
$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc

The result is current average processor power consumption in 1
seconds. The unit of the result is uWatt.

Signed-off-by: Huang Rui 
---
  drivers/hwmon/fam15h_power.c | 62 
  1 file changed, 62 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index d529e4b..3bab797 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -60,6 +60,7 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   struct mutex acc_pwr_mutex;
  };

  static ssize_t show_power(struct device *dev,
@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, 
NULL);
  static struct attribute_group fam15h_power_group;
  __ATTRIBUTE_GROUPS(fam15h_power);

+static ssize_t show_power_acc(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+   int cpu, cu, cu_num, cores_per_cu;
+   u64 curr_cu_acc_power[MAX_CUS],
+   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   cores_per_cu = amd_get_cores_per_cu();
+   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
+
+   for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
cores_per_cu) {
+   cu = cpu / cores_per_cu;
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
+   pr_err("Failed to read PTSC counter MSR on core%d\n",
+  cpu);
+   return 0;
+   }
+
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
+  _cu_acc_power[cu])) {
+   pr_err("Failed to read compute unit power accumulator MSR on 
core%d\n",
+  cpu);
+   return 0;
+   }
+
+   if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
+   jdelta[cu] = data->max_cu_acc_power + 
curr_cu_acc_power[cu];
+   jdelta[cu] -= data->cu_acc_power[cu];
+   } else {
+   jdelta[cu] = curr_cu_acc_power[cu] - 
data->cu_acc_power[cu];
+   }
+   tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
+   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   mutex_lock(>acc_pwr_mutex);
+   data->cu_acc_power[cu] = curr_cu_acc_power[cu];
+   data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
+   mutex_unlock(>acc_pwr_mutex);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, "%u\n", (unsigned int) avg_acc);
+}
+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);


I am not really a friend of introducing a non-standard attribute.
Does the energy attribute not work here ?



You're right. Non-standard attribute might not be good. Could you
please give me some hints if I use "energy" instead?


1 Joule = 1 Watt-second.

Something else, though - did you make sure that your code doesn't overflow ?
Even though you calculate the average in an u64, you display it as unsigned.

100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is
a bit large for an unsigned.

Also, the values should not be reset after reading, but accumulate.

Also, I think your code may be vulnerable to overflows on the CPU register side.
How long does it take before the CPU counters overflow ?

Thanks,
Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-28 Thread Huang Rui
On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:
> On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> > This patch introduces an algorithm that computes the average power by
> > reading a delta value of “core power accumulator” register during
> > measurement interval, and then dividing delta value by the length of
> > the time interval.
> > 
> > User is able to use power1_acc entry to measure the processor power
> > consumption and power1_acc just needs to be read twice with an needed
> > interval in-between.
> > 
> > A simple example:
> > 
> > $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> > $ sleep 1s
> > $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> > 
> > The result is current average processor power consumption in 1
> > seconds. The unit of the result is uWatt.
> > 
> > Signed-off-by: Huang Rui 
> > ---
> >  drivers/hwmon/fam15h_power.c | 62 
> > 
> >  1 file changed, 62 insertions(+)
> > 
> > diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
> > index d529e4b..3bab797 100644
> > --- a/drivers/hwmon/fam15h_power.c
> > +++ b/drivers/hwmon/fam15h_power.c
> > @@ -60,6 +60,7 @@ struct fam15h_power_data {
> > u64 cu_acc_power[MAX_CUS];
> > /* performance timestamp counter */
> > u64 cpu_sw_pwr_ptsc[MAX_CUS];
> > +   struct mutex acc_pwr_mutex;
> >  };
> >  
> >  static ssize_t show_power(struct device *dev,
> > @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
> > show_power_crit, NULL);
> >  static struct attribute_group fam15h_power_group;
> >  __ATTRIBUTE_GROUPS(fam15h_power);
> >  
> > +static ssize_t show_power_acc(struct device *dev,
> > + struct device_attribute *attr, char *buf)
> > +{
> > +   int cpu, cu, cu_num, cores_per_cu;
> > +   u64 curr_cu_acc_power[MAX_CUS],
> > +   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> > +   u64 tdelta, avg_acc;
> > +   struct fam15h_power_data *data = dev_get_drvdata(dev);
> > +
> > +   cores_per_cu = amd_get_cores_per_cu();
> > +   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> > +
> > +   for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> > cores_per_cu) {
> > +   cu = cpu / cores_per_cu;
> > +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
> > +   pr_err("Failed to read PTSC counter MSR on core%d\n",
> > +  cpu);
> > +   return 0;
> > +   }
> > +
> > +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> > +  _cu_acc_power[cu])) {
> > +   pr_err("Failed to read compute unit power accumulator 
> > MSR on core%d\n",
> > +  cpu);
> > +   return 0;
> > +   }
> > +
> > +   if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> > +   jdelta[cu] = data->max_cu_acc_power + 
> > curr_cu_acc_power[cu];
> > +   jdelta[cu] -= data->cu_acc_power[cu];
> > +   } else {
> > +   jdelta[cu] = curr_cu_acc_power[cu] - 
> > data->cu_acc_power[cu];
> > +   }
> > +   tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> > +   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> > +   do_div(jdelta[cu], tdelta);
> > +
> > +   mutex_lock(>acc_pwr_mutex);
> > +   data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> > +   data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> > +   mutex_unlock(>acc_pwr_mutex);
> > +
> > +   /* the unit is microWatt */
> > +   avg_acc += jdelta[cu];
> > +   }
> > +
> > +   return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> > +}
> > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
> 
> I am not really a friend of introducing a non-standard attribute.
> Does the energy attribute not work here ?
> 

You're right. Non-standard attribute might not be good. Could you
please give me some hints if I use "energy" instead?

Thanks,
Rui
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-28 Thread Huang Rui
On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:
 On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
  This patch introduces an algorithm that computes the average power by
  reading a delta value of “core power accumulator” register during
  measurement interval, and then dividing delta value by the length of
  the time interval.
  
  User is able to use power1_acc entry to measure the processor power
  consumption and power1_acc just needs to be read twice with an needed
  interval in-between.
  
  A simple example:
  
  $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
  $ sleep 1s
  $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
  
  The result is current average processor power consumption in 1
  seconds. The unit of the result is uWatt.
  
  Signed-off-by: Huang Rui ray.hu...@amd.com
  ---
   drivers/hwmon/fam15h_power.c | 62 
  
   1 file changed, 62 insertions(+)
  
  diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
  index d529e4b..3bab797 100644
  --- a/drivers/hwmon/fam15h_power.c
  +++ b/drivers/hwmon/fam15h_power.c
  @@ -60,6 +60,7 @@ struct fam15h_power_data {
  u64 cu_acc_power[MAX_CUS];
  /* performance timestamp counter */
  u64 cpu_sw_pwr_ptsc[MAX_CUS];
  +   struct mutex acc_pwr_mutex;
   };
   
   static ssize_t show_power(struct device *dev,
  @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
  show_power_crit, NULL);
   static struct attribute_group fam15h_power_group;
   __ATTRIBUTE_GROUPS(fam15h_power);
   
  +static ssize_t show_power_acc(struct device *dev,
  + struct device_attribute *attr, char *buf)
  +{
  +   int cpu, cu, cu_num, cores_per_cu;
  +   u64 curr_cu_acc_power[MAX_CUS],
  +   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
  +   u64 tdelta, avg_acc;
  +   struct fam15h_power_data *data = dev_get_drvdata(dev);
  +
  +   cores_per_cu = amd_get_cores_per_cu();
  +   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
  +
  +   for (cpu = 0, avg_acc = 0; cpu  cu_num * cores_per_cu; cpu += 
  cores_per_cu) {
  +   cu = cpu / cores_per_cu;
  +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) {
  +   pr_err(Failed to read PTSC counter MSR on core%d\n,
  +  cpu);
  +   return 0;
  +   }
  +
  +   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
  +  curr_cu_acc_power[cu])) {
  +   pr_err(Failed to read compute unit power accumulator 
  MSR on core%d\n,
  +  cpu);
  +   return 0;
  +   }
  +
  +   if (curr_cu_acc_power[cu]  data-cu_acc_power[cu]) {
  +   jdelta[cu] = data-max_cu_acc_power + 
  curr_cu_acc_power[cu];
  +   jdelta[cu] -= data-cu_acc_power[cu];
  +   } else {
  +   jdelta[cu] = curr_cu_acc_power[cu] - 
  data-cu_acc_power[cu];
  +   }
  +   tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu];
  +   jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000;
  +   do_div(jdelta[cu], tdelta);
  +
  +   mutex_lock(data-acc_pwr_mutex);
  +   data-cu_acc_power[cu] = curr_cu_acc_power[cu];
  +   data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
  +   mutex_unlock(data-acc_pwr_mutex);
  +
  +   /* the unit is microWatt */
  +   avg_acc += jdelta[cu];
  +   }
  +
  +   return sprintf(buf, %u\n, (unsigned int) avg_acc);
  +}
  +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
 
 I am not really a friend of introducing a non-standard attribute.
 Does the energy attribute not work here ?
 

You're right. Non-standard attribute might not be good. Could you
please give me some hints if I use energy instead?

Thanks,
Rui
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-28 Thread Guenter Roeck

On 08/28/2015 03:45 AM, Huang Rui wrote:

On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote:

On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:

This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_acc entry to measure the processor power
consumption and power1_acc just needs to be read twice with an needed
interval in-between.

A simple example:

$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
$ sleep 1s
$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc

The result is current average processor power consumption in 1
seconds. The unit of the result is uWatt.

Signed-off-by: Huang Rui ray.hu...@amd.com
---
  drivers/hwmon/fam15h_power.c | 62 
  1 file changed, 62 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index d529e4b..3bab797 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -60,6 +60,7 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   struct mutex acc_pwr_mutex;
  };

  static ssize_t show_power(struct device *dev,
@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, 
NULL);
  static struct attribute_group fam15h_power_group;
  __ATTRIBUTE_GROUPS(fam15h_power);

+static ssize_t show_power_acc(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+   int cpu, cu, cu_num, cores_per_cu;
+   u64 curr_cu_acc_power[MAX_CUS],
+   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   cores_per_cu = amd_get_cores_per_cu();
+   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
+
+   for (cpu = 0, avg_acc = 0; cpu  cu_num * cores_per_cu; cpu += 
cores_per_cu) {
+   cu = cpu / cores_per_cu;
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) {
+   pr_err(Failed to read PTSC counter MSR on core%d\n,
+  cpu);
+   return 0;
+   }
+
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
+  curr_cu_acc_power[cu])) {
+   pr_err(Failed to read compute unit power accumulator MSR on 
core%d\n,
+  cpu);
+   return 0;
+   }
+
+   if (curr_cu_acc_power[cu]  data-cu_acc_power[cu]) {
+   jdelta[cu] = data-max_cu_acc_power + 
curr_cu_acc_power[cu];
+   jdelta[cu] -= data-cu_acc_power[cu];
+   } else {
+   jdelta[cu] = curr_cu_acc_power[cu] - 
data-cu_acc_power[cu];
+   }
+   tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu];
+   jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   mutex_lock(data-acc_pwr_mutex);
+   data-cu_acc_power[cu] = curr_cu_acc_power[cu];
+   data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
+   mutex_unlock(data-acc_pwr_mutex);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, %u\n, (unsigned int) avg_acc);
+}
+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);


I am not really a friend of introducing a non-standard attribute.
Does the energy attribute not work here ?



You're right. Non-standard attribute might not be good. Could you
please give me some hints if I use energy instead?


1 Joule = 1 Watt-second.

Something else, though - did you make sure that your code doesn't overflow ?
Even though you calculate the average in an u64, you display it as unsigned.

100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is
a bit large for an unsigned.

Also, the values should not be reset after reading, but accumulate.

Also, I think your code may be vulnerable to overflows on the CPU register side.
How long does it take before the CPU counters overflow ?

Thanks,
Guenter

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-27 Thread Guenter Roeck
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
> This patch introduces an algorithm that computes the average power by
> reading a delta value of “core power accumulator” register during
> measurement interval, and then dividing delta value by the length of
> the time interval.
> 
> User is able to use power1_acc entry to measure the processor power
> consumption and power1_acc just needs to be read twice with an needed
> interval in-between.
> 
> A simple example:
> 
> $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> $ sleep 1s
> $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
> 
> The result is current average processor power consumption in 1
> seconds. The unit of the result is uWatt.
> 
> Signed-off-by: Huang Rui 
> ---
>  drivers/hwmon/fam15h_power.c | 62 
> 
>  1 file changed, 62 insertions(+)
> 
> diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
> index d529e4b..3bab797 100644
> --- a/drivers/hwmon/fam15h_power.c
> +++ b/drivers/hwmon/fam15h_power.c
> @@ -60,6 +60,7 @@ struct fam15h_power_data {
>   u64 cu_acc_power[MAX_CUS];
>   /* performance timestamp counter */
>   u64 cpu_sw_pwr_ptsc[MAX_CUS];
> + struct mutex acc_pwr_mutex;
>  };
>  
>  static ssize_t show_power(struct device *dev,
> @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
> show_power_crit, NULL);
>  static struct attribute_group fam15h_power_group;
>  __ATTRIBUTE_GROUPS(fam15h_power);
>  
> +static ssize_t show_power_acc(struct device *dev,
> +   struct device_attribute *attr, char *buf)
> +{
> + int cpu, cu, cu_num, cores_per_cu;
> + u64 curr_cu_acc_power[MAX_CUS],
> + curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
> + u64 tdelta, avg_acc;
> + struct fam15h_power_data *data = dev_get_drvdata(dev);
> +
> + cores_per_cu = amd_get_cores_per_cu();
> + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
> +
> + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
> cores_per_cu) {
> + cu = cpu / cores_per_cu;
> + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
> + pr_err("Failed to read PTSC counter MSR on core%d\n",
> +cpu);
> + return 0;
> + }
> +
> + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
> +_cu_acc_power[cu])) {
> + pr_err("Failed to read compute unit power accumulator 
> MSR on core%d\n",
> +cpu);
> + return 0;
> + }
> +
> + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
> + jdelta[cu] = data->max_cu_acc_power + 
> curr_cu_acc_power[cu];
> + jdelta[cu] -= data->cu_acc_power[cu];
> + } else {
> + jdelta[cu] = curr_cu_acc_power[cu] - 
> data->cu_acc_power[cu];
> + }
> + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
> + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
> + do_div(jdelta[cu], tdelta);
> +
> + mutex_lock(>acc_pwr_mutex);
> + data->cu_acc_power[cu] = curr_cu_acc_power[cu];
> + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
> + mutex_unlock(>acc_pwr_mutex);
> +
> + /* the unit is microWatt */
> + avg_acc += jdelta[cu];
> + }
> +
> + return sprintf(buf, "%u\n", (unsigned int) avg_acc);
> +}
> +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);

I am not really a friend of introducing a non-standard attribute.
Does the energy attribute not work here ?

Thanks,
Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-27 Thread Huang Rui
This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_acc entry to measure the processor power
consumption and power1_acc just needs to be read twice with an needed
interval in-between.

A simple example:

$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
$ sleep 1s
$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc

The result is current average processor power consumption in 1
seconds. The unit of the result is uWatt.

Signed-off-by: Huang Rui 
---
 drivers/hwmon/fam15h_power.c | 62 
 1 file changed, 62 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index d529e4b..3bab797 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -60,6 +60,7 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   struct mutex acc_pwr_mutex;
 };
 
 static ssize_t show_power(struct device *dev,
@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, 
NULL);
 static struct attribute_group fam15h_power_group;
 __ATTRIBUTE_GROUPS(fam15h_power);
 
+static ssize_t show_power_acc(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+   int cpu, cu, cu_num, cores_per_cu;
+   u64 curr_cu_acc_power[MAX_CUS],
+   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   cores_per_cu = amd_get_cores_per_cu();
+   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
+
+   for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += 
cores_per_cu) {
+   cu = cpu / cores_per_cu;
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) {
+   pr_err("Failed to read PTSC counter MSR on core%d\n",
+  cpu);
+   return 0;
+   }
+
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
+  _cu_acc_power[cu])) {
+   pr_err("Failed to read compute unit power accumulator 
MSR on core%d\n",
+  cpu);
+   return 0;
+   }
+
+   if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) {
+   jdelta[cu] = data->max_cu_acc_power + 
curr_cu_acc_power[cu];
+   jdelta[cu] -= data->cu_acc_power[cu];
+   } else {
+   jdelta[cu] = curr_cu_acc_power[cu] - 
data->cu_acc_power[cu];
+   }
+   tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu];
+   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   mutex_lock(>acc_pwr_mutex);
+   data->cu_acc_power[cu] = curr_cu_acc_power[cu];
+   data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
+   mutex_unlock(>acc_pwr_mutex);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, "%u\n", (unsigned int) avg_acc);
+}
+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
+
 static int fam15h_power_init_attrs(struct pci_dev *pdev)
 {
int n = FAM15H_MIN_POWER_GROUPS;
struct attribute **fam15h_power_attrs;
struct cpuinfo_x86 *c = _cpu_data;
+   u32 cpuid;
 
if (c->x86 == 0x15 &&
((c->x86_model <= 0xf) ||
 (c->x86_model >= 0x60 && c->x86_model <= 0x6f)))
n += 1;
 
+   cpuid = cpuid_edx(0x8007);
+
+   /* check if processor supports accumulated power */
+   if (cpuid & BIT(12))
+   n += 1;
+
fam15h_power_attrs = devm_kcalloc(>dev, n,
  sizeof(*fam15h_power_attrs),
  GFP_KERNEL);
@@ -148,6 +206,9 @@ static int fam15h_power_init_attrs(struct pci_dev *pdev)
 (c->x86_model >= 0x60 && c->x86_model <= 0x6f)))
fam15h_power_attrs[n++] = _attr_power1_input.attr;
 
+   if (cpuid & BIT(12))
+   fam15h_power_attrs[n++] = _attr_power1_acc.attr;
+
fam15h_power_group.attrs = fam15h_power_attrs;
 
return 0;
@@ -311,6 +372,7 @@ static int fam15h_power_probe(struct pci_dev *pdev,
if (ret)
return ret;
 
+   mutex_init(>acc_pwr_mutex);
data->pdev = pdev;
 
hwmon_dev = devm_hwmon_device_register_with_groups(dev, "fam15h_power",
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to 

Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-27 Thread Guenter Roeck
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote:
 This patch introduces an algorithm that computes the average power by
 reading a delta value of “core power accumulator” register during
 measurement interval, and then dividing delta value by the length of
 the time interval.
 
 User is able to use power1_acc entry to measure the processor power
 consumption and power1_acc just needs to be read twice with an needed
 interval in-between.
 
 A simple example:
 
 $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
 $ sleep 1s
 $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
 
 The result is current average processor power consumption in 1
 seconds. The unit of the result is uWatt.
 
 Signed-off-by: Huang Rui ray.hu...@amd.com
 ---
  drivers/hwmon/fam15h_power.c | 62 
 
  1 file changed, 62 insertions(+)
 
 diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
 index d529e4b..3bab797 100644
 --- a/drivers/hwmon/fam15h_power.c
 +++ b/drivers/hwmon/fam15h_power.c
 @@ -60,6 +60,7 @@ struct fam15h_power_data {
   u64 cu_acc_power[MAX_CUS];
   /* performance timestamp counter */
   u64 cpu_sw_pwr_ptsc[MAX_CUS];
 + struct mutex acc_pwr_mutex;
  };
  
  static ssize_t show_power(struct device *dev,
 @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, 
 show_power_crit, NULL);
  static struct attribute_group fam15h_power_group;
  __ATTRIBUTE_GROUPS(fam15h_power);
  
 +static ssize_t show_power_acc(struct device *dev,
 +   struct device_attribute *attr, char *buf)
 +{
 + int cpu, cu, cu_num, cores_per_cu;
 + u64 curr_cu_acc_power[MAX_CUS],
 + curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
 + u64 tdelta, avg_acc;
 + struct fam15h_power_data *data = dev_get_drvdata(dev);
 +
 + cores_per_cu = amd_get_cores_per_cu();
 + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
 +
 + for (cpu = 0, avg_acc = 0; cpu  cu_num * cores_per_cu; cpu += 
 cores_per_cu) {
 + cu = cpu / cores_per_cu;
 + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) {
 + pr_err(Failed to read PTSC counter MSR on core%d\n,
 +cpu);
 + return 0;
 + }
 +
 + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
 +curr_cu_acc_power[cu])) {
 + pr_err(Failed to read compute unit power accumulator 
 MSR on core%d\n,
 +cpu);
 + return 0;
 + }
 +
 + if (curr_cu_acc_power[cu]  data-cu_acc_power[cu]) {
 + jdelta[cu] = data-max_cu_acc_power + 
 curr_cu_acc_power[cu];
 + jdelta[cu] -= data-cu_acc_power[cu];
 + } else {
 + jdelta[cu] = curr_cu_acc_power[cu] - 
 data-cu_acc_power[cu];
 + }
 + tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu];
 + jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000;
 + do_div(jdelta[cu], tdelta);
 +
 + mutex_lock(data-acc_pwr_mutex);
 + data-cu_acc_power[cu] = curr_cu_acc_power[cu];
 + data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
 + mutex_unlock(data-acc_pwr_mutex);
 +
 + /* the unit is microWatt */
 + avg_acc += jdelta[cu];
 + }
 +
 + return sprintf(buf, %u\n, (unsigned int) avg_acc);
 +}
 +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);

I am not really a friend of introducing a non-standard attribute.
Does the energy attribute not work here ?

Thanks,
Guenter
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm

2015-08-27 Thread Huang Rui
This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_acc entry to measure the processor power
consumption and power1_acc just needs to be read twice with an needed
interval in-between.

A simple example:

$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc
$ sleep 1s
$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc

The result is current average processor power consumption in 1
seconds. The unit of the result is uWatt.

Signed-off-by: Huang Rui ray.hu...@amd.com
---
 drivers/hwmon/fam15h_power.c | 62 
 1 file changed, 62 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index d529e4b..3bab797 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -60,6 +60,7 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   struct mutex acc_pwr_mutex;
 };
 
 static ssize_t show_power(struct device *dev,
@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, 
NULL);
 static struct attribute_group fam15h_power_group;
 __ATTRIBUTE_GROUPS(fam15h_power);
 
+static ssize_t show_power_acc(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+   int cpu, cu, cu_num, cores_per_cu;
+   u64 curr_cu_acc_power[MAX_CUS],
+   curr_ptsc[MAX_CUS], jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   cores_per_cu = amd_get_cores_per_cu();
+   cu_num = boot_cpu_data.x86_max_cores / cores_per_cu;
+
+   for (cpu = 0, avg_acc = 0; cpu  cu_num * cores_per_cu; cpu += 
cores_per_cu) {
+   cu = cpu / cores_per_cu;
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) {
+   pr_err(Failed to read PTSC counter MSR on core%d\n,
+  cpu);
+   return 0;
+   }
+
+   if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR,
+  curr_cu_acc_power[cu])) {
+   pr_err(Failed to read compute unit power accumulator 
MSR on core%d\n,
+  cpu);
+   return 0;
+   }
+
+   if (curr_cu_acc_power[cu]  data-cu_acc_power[cu]) {
+   jdelta[cu] = data-max_cu_acc_power + 
curr_cu_acc_power[cu];
+   jdelta[cu] -= data-cu_acc_power[cu];
+   } else {
+   jdelta[cu] = curr_cu_acc_power[cu] - 
data-cu_acc_power[cu];
+   }
+   tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu];
+   jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   mutex_lock(data-acc_pwr_mutex);
+   data-cu_acc_power[cu] = curr_cu_acc_power[cu];
+   data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu];
+   mutex_unlock(data-acc_pwr_mutex);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, %u\n, (unsigned int) avg_acc);
+}
+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL);
+
 static int fam15h_power_init_attrs(struct pci_dev *pdev)
 {
int n = FAM15H_MIN_POWER_GROUPS;
struct attribute **fam15h_power_attrs;
struct cpuinfo_x86 *c = boot_cpu_data;
+   u32 cpuid;
 
if (c-x86 == 0x15 
((c-x86_model = 0xf) ||
 (c-x86_model = 0x60  c-x86_model = 0x6f)))
n += 1;
 
+   cpuid = cpuid_edx(0x8007);
+
+   /* check if processor supports accumulated power */
+   if (cpuid  BIT(12))
+   n += 1;
+
fam15h_power_attrs = devm_kcalloc(pdev-dev, n,
  sizeof(*fam15h_power_attrs),
  GFP_KERNEL);
@@ -148,6 +206,9 @@ static int fam15h_power_init_attrs(struct pci_dev *pdev)
 (c-x86_model = 0x60  c-x86_model = 0x6f)))
fam15h_power_attrs[n++] = dev_attr_power1_input.attr;
 
+   if (cpuid  BIT(12))
+   fam15h_power_attrs[n++] = dev_attr_power1_acc.attr;
+
fam15h_power_group.attrs = fam15h_power_attrs;
 
return 0;
@@ -311,6 +372,7 @@ static int fam15h_power_probe(struct pci_dev *pdev,
if (ret)
return ret;
 
+   mutex_init(data-acc_pwr_mutex);
data-pdev = pdev;
 
hwmon_dev = devm_hwmon_device_register_with_groups(dev, fam15h_power,
-- 
1.9.1

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to