Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Mon, Aug 31, 2015 at 03:25:35PM +0200, Peter Zijlstra wrote: > On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > > +static ssize_t show_power_acc(struct device *dev, > > + struct device_attribute *attr, char *buf) > > +{ > > + int cpu, cu, cu_num, cores_per_cu; > > + u64 curr_cu_acc_power[MAX_CUS], > > + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > > + u64 tdelta, avg_acc; > > + struct fam15h_power_data *data = dev_get_drvdata(dev); > > + > > + cores_per_cu = amd_get_cores_per_cu(); > > + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > > + > > + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > > cores_per_cu) { > > + cu = cpu / cores_per_cu; > > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { > > + pr_err("Failed to read PTSC counter MSR on core%d\n", > > + cpu); > > + return 0; > > + } > > + > > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > > + _cu_acc_power[cu])) { > > + pr_err("Failed to read compute unit power accumulator > > MSR on core%d\n", > > + cpu); > > + return 0; > > + } > > + > > + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > > + jdelta[cu] = data->max_cu_acc_power + > > curr_cu_acc_power[cu]; > > + jdelta[cu] -= data->cu_acc_power[cu]; > > + } else { > > + jdelta[cu] = curr_cu_acc_power[cu] - > > data->cu_acc_power[cu]; > > + } > > + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > > + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > > + do_div(jdelta[cu], tdelta); > > + > > + mutex_lock(>acc_pwr_mutex); > > + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > > + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > > + mutex_unlock(>acc_pwr_mutex); > > + > > + /* the unit is microWatt */ > > + avg_acc += jdelta[cu]; > > + } > > + > > + return sprintf(buf, "%u\n", (unsigned int) avg_acc); > > +} > > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); > > This is a world readable file that sprays IPIs across the entire system, > how is that a good idea? FWIW this driver is also hotplug challenged. Not to mention it relies on cpu number layout in unhealthy ways. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > +static ssize_t show_power_acc(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + int cpu, cu, cu_num, cores_per_cu; > + u64 curr_cu_acc_power[MAX_CUS], > + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > + u64 tdelta, avg_acc; > + struct fam15h_power_data *data = dev_get_drvdata(dev); > + > + cores_per_cu = amd_get_cores_per_cu(); > + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > + > + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > cores_per_cu) { > + cu = cpu / cores_per_cu; > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { > + pr_err("Failed to read PTSC counter MSR on core%d\n", > +cpu); > + return 0; > + } > + > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > +_cu_acc_power[cu])) { > + pr_err("Failed to read compute unit power accumulator > MSR on core%d\n", > +cpu); > + return 0; > + } > + > + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > + jdelta[cu] = data->max_cu_acc_power + > curr_cu_acc_power[cu]; > + jdelta[cu] -= data->cu_acc_power[cu]; > + } else { > + jdelta[cu] = curr_cu_acc_power[cu] - > data->cu_acc_power[cu]; > + } > + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > + do_div(jdelta[cu], tdelta); > + > + mutex_lock(>acc_pwr_mutex); > + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > + mutex_unlock(>acc_pwr_mutex); > + > + /* the unit is microWatt */ > + avg_acc += jdelta[cu]; > + } > + > + return sprintf(buf, "%u\n", (unsigned int) avg_acc); > +} > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); This is a world readable file that sprays IPIs across the entire system, how is that a good idea? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Sun, Aug 30, 2015 at 09:30:28PM -0700, Guenter Roeck wrote: > On 08/30/2015 09:16 PM, Huang Rui wrote: > >On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote: > >>On 08/28/2015 03:45 AM, Huang Rui wrote: > >>>On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: > On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > >This patch introduces an algorithm that computes the average power by > >reading a delta value of “core power accumulator” register during > >measurement interval, and then dividing delta value by the length of > >the time interval. > > > >User is able to use power1_acc entry to measure the processor power > >consumption and power1_acc just needs to be read twice with an needed > >interval in-between. > > > >A simple example: > > > >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > >$ sleep 1s > >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > > > >The result is current average processor power consumption in 1 > >seconds. The unit of the result is uWatt. > > > >Signed-off-by: Huang Rui > >--- > > drivers/hwmon/fam15h_power.c | 62 > > > > 1 file changed, 62 insertions(+) > > > >diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c > >index d529e4b..3bab797 100644 > >--- a/drivers/hwmon/fam15h_power.c > >+++ b/drivers/hwmon/fam15h_power.c > >@@ -60,6 +60,7 @@ struct fam15h_power_data { > > u64 cu_acc_power[MAX_CUS]; > > /* performance timestamp counter */ > > u64 cpu_sw_pwr_ptsc[MAX_CUS]; > >+struct mutex acc_pwr_mutex; > > }; > > > > static ssize_t show_power(struct device *dev, > >@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, > >show_power_crit, NULL); > > static struct attribute_group fam15h_power_group; > > __ATTRIBUTE_GROUPS(fam15h_power); > > > >+static ssize_t show_power_acc(struct device *dev, > >+ struct device_attribute *attr, char *buf) > >+{ > >+int cpu, cu, cu_num, cores_per_cu; > >+u64 curr_cu_acc_power[MAX_CUS], > >+curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > >+u64 tdelta, avg_acc; > >+struct fam15h_power_data *data = dev_get_drvdata(dev); > >+ > >+cores_per_cu = amd_get_cores_per_cu(); > >+cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > >+ > >+for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > >cores_per_cu) { > >+cu = cpu / cores_per_cu; > >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, > >_ptsc[cu])) { > >+pr_err("Failed to read PTSC counter MSR on > >core%d\n", > >+ cpu); > >+return 0; > >+} > >+ > >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > >+ _cu_acc_power[cu])) { > >+pr_err("Failed to read compute unit power > >accumulator MSR on core%d\n", > >+ cpu); > >+return 0; > >+} > >+ > >+if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > >+jdelta[cu] = data->max_cu_acc_power + > >curr_cu_acc_power[cu]; > >+jdelta[cu] -= data->cu_acc_power[cu]; > >+} else { > >+jdelta[cu] = curr_cu_acc_power[cu] - > >data->cu_acc_power[cu]; > >+} > >+tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > >+jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > >+do_div(jdelta[cu], tdelta); > >+ > >+mutex_lock(>acc_pwr_mutex); > >+data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > >+data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > >+mutex_unlock(>acc_pwr_mutex); > >+ > >+/* the unit is microWatt */ > >+avg_acc += jdelta[cu]; > >+} > >+ > >+return sprintf(buf, "%u\n", (unsigned int) avg_acc); > >+} > >+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); > > I am not really a friend of introducing a non-standard attribute. > Does the energy attribute not work here ? > > >>> > >>>You're right. Non-standard attribute might not be good. Could you > >>>please give me some hints if I use "energy" instead? > >>> > >>1 Joule = 1 Watt-second. > >> > >>Something else, though - did you make sure that your code doesn't overflow ? > >>Even
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Mon, Aug 31, 2015 at 03:25:35PM +0200, Peter Zijlstra wrote: > On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > > +static ssize_t show_power_acc(struct device *dev, > > + struct device_attribute *attr, char *buf) > > +{ > > + int cpu, cu, cu_num, cores_per_cu; > > + u64 curr_cu_acc_power[MAX_CUS], > > + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > > + u64 tdelta, avg_acc; > > + struct fam15h_power_data *data = dev_get_drvdata(dev); > > + > > + cores_per_cu = amd_get_cores_per_cu(); > > + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > > + > > + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > > cores_per_cu) { > > + cu = cpu / cores_per_cu; > > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { > > + pr_err("Failed to read PTSC counter MSR on core%d\n", > > + cpu); > > + return 0; > > + } > > + > > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > > + _cu_acc_power[cu])) { > > + pr_err("Failed to read compute unit power accumulator > > MSR on core%d\n", > > + cpu); > > + return 0; > > + } > > + > > + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > > + jdelta[cu] = data->max_cu_acc_power + > > curr_cu_acc_power[cu]; > > + jdelta[cu] -= data->cu_acc_power[cu]; > > + } else { > > + jdelta[cu] = curr_cu_acc_power[cu] - > > data->cu_acc_power[cu]; > > + } > > + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > > + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > > + do_div(jdelta[cu], tdelta); > > + > > + mutex_lock(>acc_pwr_mutex); > > + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > > + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > > + mutex_unlock(>acc_pwr_mutex); > > + > > + /* the unit is microWatt */ > > + avg_acc += jdelta[cu]; > > + } > > + > > + return sprintf(buf, "%u\n", (unsigned int) avg_acc); > > +} > > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); > > This is a world readable file that sprays IPIs across the entire system, > how is that a good idea? FWIW this driver is also hotplug challenged. Not to mention it relies on cpu number layout in unhealthy ways. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > +static ssize_t show_power_acc(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + int cpu, cu, cu_num, cores_per_cu; > + u64 curr_cu_acc_power[MAX_CUS], > + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > + u64 tdelta, avg_acc; > + struct fam15h_power_data *data = dev_get_drvdata(dev); > + > + cores_per_cu = amd_get_cores_per_cu(); > + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > + > + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > cores_per_cu) { > + cu = cpu / cores_per_cu; > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { > + pr_err("Failed to read PTSC counter MSR on core%d\n", > +cpu); > + return 0; > + } > + > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > +_cu_acc_power[cu])) { > + pr_err("Failed to read compute unit power accumulator > MSR on core%d\n", > +cpu); > + return 0; > + } > + > + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > + jdelta[cu] = data->max_cu_acc_power + > curr_cu_acc_power[cu]; > + jdelta[cu] -= data->cu_acc_power[cu]; > + } else { > + jdelta[cu] = curr_cu_acc_power[cu] - > data->cu_acc_power[cu]; > + } > + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > + do_div(jdelta[cu], tdelta); > + > + mutex_lock(>acc_pwr_mutex); > + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > + mutex_unlock(>acc_pwr_mutex); > + > + /* the unit is microWatt */ > + avg_acc += jdelta[cu]; > + } > + > + return sprintf(buf, "%u\n", (unsigned int) avg_acc); > +} > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); This is a world readable file that sprays IPIs across the entire system, how is that a good idea? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Sun, Aug 30, 2015 at 09:30:28PM -0700, Guenter Roeck wrote: > On 08/30/2015 09:16 PM, Huang Rui wrote: > >On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote: > >>On 08/28/2015 03:45 AM, Huang Rui wrote: > >>>On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: > On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > >This patch introduces an algorithm that computes the average power by > >reading a delta value of “core power accumulator” register during > >measurement interval, and then dividing delta value by the length of > >the time interval. > > > >User is able to use power1_acc entry to measure the processor power > >consumption and power1_acc just needs to be read twice with an needed > >interval in-between. > > > >A simple example: > > > >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > >$ sleep 1s > >$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > > > >The result is current average processor power consumption in 1 > >seconds. The unit of the result is uWatt. > > > >Signed-off-by: Huang Rui> >--- > > drivers/hwmon/fam15h_power.c | 62 > > > > 1 file changed, 62 insertions(+) > > > >diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c > >index d529e4b..3bab797 100644 > >--- a/drivers/hwmon/fam15h_power.c > >+++ b/drivers/hwmon/fam15h_power.c > >@@ -60,6 +60,7 @@ struct fam15h_power_data { > > u64 cu_acc_power[MAX_CUS]; > > /* performance timestamp counter */ > > u64 cpu_sw_pwr_ptsc[MAX_CUS]; > >+struct mutex acc_pwr_mutex; > > }; > > > > static ssize_t show_power(struct device *dev, > >@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, > >show_power_crit, NULL); > > static struct attribute_group fam15h_power_group; > > __ATTRIBUTE_GROUPS(fam15h_power); > > > >+static ssize_t show_power_acc(struct device *dev, > >+ struct device_attribute *attr, char *buf) > >+{ > >+int cpu, cu, cu_num, cores_per_cu; > >+u64 curr_cu_acc_power[MAX_CUS], > >+curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > >+u64 tdelta, avg_acc; > >+struct fam15h_power_data *data = dev_get_drvdata(dev); > >+ > >+cores_per_cu = amd_get_cores_per_cu(); > >+cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > >+ > >+for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > >cores_per_cu) { > >+cu = cpu / cores_per_cu; > >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, > >_ptsc[cu])) { > >+pr_err("Failed to read PTSC counter MSR on > >core%d\n", > >+ cpu); > >+return 0; > >+} > >+ > >+if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > >+ _cu_acc_power[cu])) { > >+pr_err("Failed to read compute unit power > >accumulator MSR on core%d\n", > >+ cpu); > >+return 0; > >+} > >+ > >+if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > >+jdelta[cu] = data->max_cu_acc_power + > >curr_cu_acc_power[cu]; > >+jdelta[cu] -= data->cu_acc_power[cu]; > >+} else { > >+jdelta[cu] = curr_cu_acc_power[cu] - > >data->cu_acc_power[cu]; > >+} > >+tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > >+jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > >+do_div(jdelta[cu], tdelta); > >+ > >+mutex_lock(>acc_pwr_mutex); > >+data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > >+data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > >+mutex_unlock(>acc_pwr_mutex); > >+ > >+/* the unit is microWatt */ > >+avg_acc += jdelta[cu]; > >+} > >+ > >+return sprintf(buf, "%u\n", (unsigned int) avg_acc); > >+} > >+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); > > I am not really a friend of introducing a non-standard attribute. > Does the energy attribute not work here ? > > >>> > >>>You're right. Non-standard attribute might not be good. Could you > >>>please give me some hints if I use "energy" instead? > >>> > >>1 Joule = 1 Watt-second. > >> > >>Something else, though - did you make sure that your code doesn't
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On 08/30/2015 09:16 PM, Huang Rui wrote: On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote: On 08/28/2015 03:45 AM, Huang Rui wrote: On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { + pr_err("Failed to read PTSC counter MSR on core%d\n", + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + _cu_acc_power[cu])) { + pr_err("Failed to read compute unit power accumulator MSR on core%d\n", + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { + jdelta[cu] = data->max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data->cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data->cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(>acc_pwr_mutex); + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(>acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, "%u\n", (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? You're right. Non-standard attribute might not be good. Could you please give me some hints if I use "energy" instead? 1 Joule = 1 Watt-second. Something else, though - did you make sure that your code doesn't overflow ? Even though you calculate the average in an u64, you display it as unsigned. Thanks to your reminder. It should not be overflow. The maximum power consumption of processor (AMD CZ and future 15h) is about 15 Watts = 15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24 < 32 < 64 bits. Actually, the unit of jdelta is not Joule. Because the tdelta is the loops (cycles) that PTSC counter (the freqency is about 100 MHz) counts not seconds. So avg_acc is the average power consumption not the accumulated energy. Would power1_average then be better suitable for the attribute ? There is also power1_average_interval which could be used to make the interval configurable. Thanks, Guenter -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote: > On 08/28/2015 03:45 AM, Huang Rui wrote: > >On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: > >>On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > >>>This patch introduces an algorithm that computes the average power by > >>>reading a delta value of “core power accumulator” register during > >>>measurement interval, and then dividing delta value by the length of > >>>the time interval. > >>> > >>>User is able to use power1_acc entry to measure the processor power > >>>consumption and power1_acc just needs to be read twice with an needed > >>>interval in-between. > >>> > >>>A simple example: > >>> > >>>$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > >>>$ sleep 1s > >>>$ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > >>> > >>>The result is current average processor power consumption in 1 > >>>seconds. The unit of the result is uWatt. > >>> > >>>Signed-off-by: Huang Rui > >>>--- > >>> drivers/hwmon/fam15h_power.c | 62 > >>> > >>> 1 file changed, 62 insertions(+) > >>> > >>>diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c > >>>index d529e4b..3bab797 100644 > >>>--- a/drivers/hwmon/fam15h_power.c > >>>+++ b/drivers/hwmon/fam15h_power.c > >>>@@ -60,6 +60,7 @@ struct fam15h_power_data { > >>> u64 cu_acc_power[MAX_CUS]; > >>> /* performance timestamp counter */ > >>> u64 cpu_sw_pwr_ptsc[MAX_CUS]; > >>>+ struct mutex acc_pwr_mutex; > >>> }; > >>> > >>> static ssize_t show_power(struct device *dev, > >>>@@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, > >>>show_power_crit, NULL); > >>> static struct attribute_group fam15h_power_group; > >>> __ATTRIBUTE_GROUPS(fam15h_power); > >>> > >>>+static ssize_t show_power_acc(struct device *dev, > >>>+struct device_attribute *attr, char *buf) > >>>+{ > >>>+ int cpu, cu, cu_num, cores_per_cu; > >>>+ u64 curr_cu_acc_power[MAX_CUS], > >>>+ curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > >>>+ u64 tdelta, avg_acc; > >>>+ struct fam15h_power_data *data = dev_get_drvdata(dev); > >>>+ > >>>+ cores_per_cu = amd_get_cores_per_cu(); > >>>+ cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > >>>+ > >>>+ for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > >>>cores_per_cu) { > >>>+ cu = cpu / cores_per_cu; > >>>+ if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { > >>>+ pr_err("Failed to read PTSC counter MSR on core%d\n", > >>>+ cpu); > >>>+ return 0; > >>>+ } > >>>+ > >>>+ if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > >>>+ _cu_acc_power[cu])) { > >>>+ pr_err("Failed to read compute unit power accumulator > >>>MSR on core%d\n", > >>>+ cpu); > >>>+ return 0; > >>>+ } > >>>+ > >>>+ if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > >>>+ jdelta[cu] = data->max_cu_acc_power + > >>>curr_cu_acc_power[cu]; > >>>+ jdelta[cu] -= data->cu_acc_power[cu]; > >>>+ } else { > >>>+ jdelta[cu] = curr_cu_acc_power[cu] - > >>>data->cu_acc_power[cu]; > >>>+ } > >>>+ tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > >>>+ jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > >>>+ do_div(jdelta[cu], tdelta); > >>>+ > >>>+ mutex_lock(>acc_pwr_mutex); > >>>+ data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > >>>+ data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > >>>+ mutex_unlock(>acc_pwr_mutex); > >>>+ > >>>+ /* the unit is microWatt */ > >>>+ avg_acc += jdelta[cu]; > >>>+ } > >>>+ > >>>+ return sprintf(buf, "%u\n", (unsigned int) avg_acc); > >>>+} > >>>+static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); > >> > >>I am not really a friend of introducing a non-standard attribute. > >>Does the energy attribute not work here ? > >> > > > >You're right. Non-standard attribute might not be good. Could you > >please give me some hints if I use "energy" instead? > > > 1 Joule = 1 Watt-second. > > Something else, though - did you make sure that your code doesn't overflow ? > Even though you calculate the average in an u64, you display it as unsigned. > Thanks to your reminder. It should not be overflow. The maximum power consumption of processor (AMD CZ and future 15h) is about 15 Watts = 15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24 < 32 < 64 bits. Actually, the unit of jdelta is not Joule. Because the tdelta is the loops (cycles) that PTSC counter (the freqency is about 100 MHz) counts not seconds. So avg_acc is the average power consumption not the accumulated energy. > 100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is > a
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote: On 08/28/2015 03:45 AM, Huang Rui wrote: On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui ray.hu...@amd.com --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, +struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) { + pr_err(Failed to read PTSC counter MSR on core%d\n, + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + curr_cu_acc_power[cu])) { + pr_err(Failed to read compute unit power accumulator MSR on core%d\n, + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] data-cu_acc_power[cu]) { + jdelta[cu] = data-max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data-cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data-cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(data-acc_pwr_mutex); + data-cu_acc_power[cu] = curr_cu_acc_power[cu]; + data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(data-acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, %u\n, (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? You're right. Non-standard attribute might not be good. Could you please give me some hints if I use energy instead? 1 Joule = 1 Watt-second. Something else, though - did you make sure that your code doesn't overflow ? Even though you calculate the average in an u64, you display it as unsigned. Thanks to your reminder. It should not be overflow. The maximum power consumption of processor (AMD CZ and future 15h) is about 15 Watts = 15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24 32 64 bits. Actually, the unit of jdelta is not Joule. Because the tdelta is the loops (cycles) that PTSC counter (the freqency is about 100 MHz) counts not seconds. So avg_acc is the average power consumption not the accumulated energy. 100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is a bit large for an unsigned. Also, the values should not be reset after reading, but accumulate. Also, I think your code may be vulnerable to overflows on the CPU register side. How long does it take before the CPU counters overflow ? If I use energy, 15w * 10,000s = 150,000,000,000 microWatt-seconds. Yes, it's large for an unsigned, but suitable for u64. The accumulated power of
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On 08/30/2015 09:16 PM, Huang Rui wrote: On Fri, Aug 28, 2015 at 07:05:13AM -0700, Guenter Roeck wrote: On 08/28/2015 03:45 AM, Huang Rui wrote: On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui ray.hu...@amd.com --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) { + pr_err(Failed to read PTSC counter MSR on core%d\n, + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + curr_cu_acc_power[cu])) { + pr_err(Failed to read compute unit power accumulator MSR on core%d\n, + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] data-cu_acc_power[cu]) { + jdelta[cu] = data-max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data-cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data-cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(data-acc_pwr_mutex); + data-cu_acc_power[cu] = curr_cu_acc_power[cu]; + data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(data-acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, %u\n, (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? You're right. Non-standard attribute might not be good. Could you please give me some hints if I use energy instead? 1 Joule = 1 Watt-second. Something else, though - did you make sure that your code doesn't overflow ? Even though you calculate the average in an u64, you display it as unsigned. Thanks to your reminder. It should not be overflow. The maximum power consumption of processor (AMD CZ and future 15h) is about 15 Watts = 15,000,000 uWatts = 0xE4E1C0 uWatts, the size is 24 32 64 bits. Actually, the unit of jdelta is not Joule. Because the tdelta is the loops (cycles) that PTSC counter (the freqency is about 100 MHz) counts not seconds. So avg_acc is the average power consumption not the accumulated energy. Would power1_average then be better suitable for the attribute ? There is also power1_average_interval which could be used to make the interval configurable. Thanks, Guenter -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On 08/28/2015 03:45 AM, Huang Rui wrote: On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { + pr_err("Failed to read PTSC counter MSR on core%d\n", + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + _cu_acc_power[cu])) { + pr_err("Failed to read compute unit power accumulator MSR on core%d\n", + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { + jdelta[cu] = data->max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data->cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data->cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(>acc_pwr_mutex); + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(>acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, "%u\n", (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? You're right. Non-standard attribute might not be good. Could you please give me some hints if I use "energy" instead? 1 Joule = 1 Watt-second. Something else, though - did you make sure that your code doesn't overflow ? Even though you calculate the average in an u64, you display it as unsigned. 100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is a bit large for an unsigned. Also, the values should not be reset after reading, but accumulate. Also, I think your code may be vulnerable to overflows on the CPU register side. How long does it take before the CPU counters overflow ? Thanks, Guenter -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: > On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > > This patch introduces an algorithm that computes the average power by > > reading a delta value of “core power accumulator” register during > > measurement interval, and then dividing delta value by the length of > > the time interval. > > > > User is able to use power1_acc entry to measure the processor power > > consumption and power1_acc just needs to be read twice with an needed > > interval in-between. > > > > A simple example: > > > > $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > > $ sleep 1s > > $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > > > > The result is current average processor power consumption in 1 > > seconds. The unit of the result is uWatt. > > > > Signed-off-by: Huang Rui > > --- > > drivers/hwmon/fam15h_power.c | 62 > > > > 1 file changed, 62 insertions(+) > > > > diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c > > index d529e4b..3bab797 100644 > > --- a/drivers/hwmon/fam15h_power.c > > +++ b/drivers/hwmon/fam15h_power.c > > @@ -60,6 +60,7 @@ struct fam15h_power_data { > > u64 cu_acc_power[MAX_CUS]; > > /* performance timestamp counter */ > > u64 cpu_sw_pwr_ptsc[MAX_CUS]; > > + struct mutex acc_pwr_mutex; > > }; > > > > static ssize_t show_power(struct device *dev, > > @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, > > show_power_crit, NULL); > > static struct attribute_group fam15h_power_group; > > __ATTRIBUTE_GROUPS(fam15h_power); > > > > +static ssize_t show_power_acc(struct device *dev, > > + struct device_attribute *attr, char *buf) > > +{ > > + int cpu, cu, cu_num, cores_per_cu; > > + u64 curr_cu_acc_power[MAX_CUS], > > + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > > + u64 tdelta, avg_acc; > > + struct fam15h_power_data *data = dev_get_drvdata(dev); > > + > > + cores_per_cu = amd_get_cores_per_cu(); > > + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > > + > > + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > > cores_per_cu) { > > + cu = cpu / cores_per_cu; > > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { > > + pr_err("Failed to read PTSC counter MSR on core%d\n", > > + cpu); > > + return 0; > > + } > > + > > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > > + _cu_acc_power[cu])) { > > + pr_err("Failed to read compute unit power accumulator > > MSR on core%d\n", > > + cpu); > > + return 0; > > + } > > + > > + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > > + jdelta[cu] = data->max_cu_acc_power + > > curr_cu_acc_power[cu]; > > + jdelta[cu] -= data->cu_acc_power[cu]; > > + } else { > > + jdelta[cu] = curr_cu_acc_power[cu] - > > data->cu_acc_power[cu]; > > + } > > + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > > + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > > + do_div(jdelta[cu], tdelta); > > + > > + mutex_lock(>acc_pwr_mutex); > > + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > > + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > > + mutex_unlock(>acc_pwr_mutex); > > + > > + /* the unit is microWatt */ > > + avg_acc += jdelta[cu]; > > + } > > + > > + return sprintf(buf, "%u\n", (unsigned int) avg_acc); > > +} > > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); > > I am not really a friend of introducing a non-standard attribute. > Does the energy attribute not work here ? > You're right. Non-standard attribute might not be good. Could you please give me some hints if I use "energy" instead? Thanks, Rui -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui ray.hu...@amd.com --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) { + pr_err(Failed to read PTSC counter MSR on core%d\n, + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + curr_cu_acc_power[cu])) { + pr_err(Failed to read compute unit power accumulator MSR on core%d\n, + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] data-cu_acc_power[cu]) { + jdelta[cu] = data-max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data-cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data-cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(data-acc_pwr_mutex); + data-cu_acc_power[cu] = curr_cu_acc_power[cu]; + data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(data-acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, %u\n, (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? You're right. Non-standard attribute might not be good. Could you please give me some hints if I use energy instead? Thanks, Rui -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On 08/28/2015 03:45 AM, Huang Rui wrote: On Thu, Aug 27, 2015 at 10:30:43AM -0700, Guenter Roeck wrote: On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui ray.hu...@amd.com --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) { + pr_err(Failed to read PTSC counter MSR on core%d\n, + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + curr_cu_acc_power[cu])) { + pr_err(Failed to read compute unit power accumulator MSR on core%d\n, + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] data-cu_acc_power[cu]) { + jdelta[cu] = data-max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data-cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data-cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(data-acc_pwr_mutex); + data-cu_acc_power[cu] = curr_cu_acc_power[cu]; + data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(data-acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, %u\n, (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? You're right. Non-standard attribute might not be good. Could you please give me some hints if I use energy instead? 1 Joule = 1 Watt-second. Something else, though - did you make sure that your code doesn't overflow ? Even though you calculate the average in an u64, you display it as unsigned. 100w * 10,000s = 1,000,000ws = 1,000,000,000,000 micro-watt-seconds, which is a bit large for an unsigned. Also, the values should not be reset after reading, but accumulate. Also, I think your code may be vulnerable to overflows on the CPU register side. How long does it take before the CPU counters overflow ? Thanks, Guenter -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: > This patch introduces an algorithm that computes the average power by > reading a delta value of “core power accumulator” register during > measurement interval, and then dividing delta value by the length of > the time interval. > > User is able to use power1_acc entry to measure the processor power > consumption and power1_acc just needs to be read twice with an needed > interval in-between. > > A simple example: > > $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > $ sleep 1s > $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc > > The result is current average processor power consumption in 1 > seconds. The unit of the result is uWatt. > > Signed-off-by: Huang Rui > --- > drivers/hwmon/fam15h_power.c | 62 > > 1 file changed, 62 insertions(+) > > diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c > index d529e4b..3bab797 100644 > --- a/drivers/hwmon/fam15h_power.c > +++ b/drivers/hwmon/fam15h_power.c > @@ -60,6 +60,7 @@ struct fam15h_power_data { > u64 cu_acc_power[MAX_CUS]; > /* performance timestamp counter */ > u64 cpu_sw_pwr_ptsc[MAX_CUS]; > + struct mutex acc_pwr_mutex; > }; > > static ssize_t show_power(struct device *dev, > @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, > show_power_crit, NULL); > static struct attribute_group fam15h_power_group; > __ATTRIBUTE_GROUPS(fam15h_power); > > +static ssize_t show_power_acc(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + int cpu, cu, cu_num, cores_per_cu; > + u64 curr_cu_acc_power[MAX_CUS], > + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; > + u64 tdelta, avg_acc; > + struct fam15h_power_data *data = dev_get_drvdata(dev); > + > + cores_per_cu = amd_get_cores_per_cu(); > + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; > + > + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += > cores_per_cu) { > + cu = cpu / cores_per_cu; > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { > + pr_err("Failed to read PTSC counter MSR on core%d\n", > +cpu); > + return 0; > + } > + > + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, > +_cu_acc_power[cu])) { > + pr_err("Failed to read compute unit power accumulator > MSR on core%d\n", > +cpu); > + return 0; > + } > + > + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { > + jdelta[cu] = data->max_cu_acc_power + > curr_cu_acc_power[cu]; > + jdelta[cu] -= data->cu_acc_power[cu]; > + } else { > + jdelta[cu] = curr_cu_acc_power[cu] - > data->cu_acc_power[cu]; > + } > + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; > + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; > + do_div(jdelta[cu], tdelta); > + > + mutex_lock(>acc_pwr_mutex); > + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; > + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; > + mutex_unlock(>acc_pwr_mutex); > + > + /* the unit is microWatt */ > + avg_acc += jdelta[cu]; > + } > + > + return sprintf(buf, "%u\n", (unsigned int) avg_acc); > +} > +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? Thanks, Guenter -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu < cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, _ptsc[cu])) { + pr_err("Failed to read PTSC counter MSR on core%d\n", + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + _cu_acc_power[cu])) { + pr_err("Failed to read compute unit power accumulator MSR on core%d\n", + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] < data->cu_acc_power[cu]) { + jdelta[cu] = data->max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data->cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data->cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data->cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(>acc_pwr_mutex); + data->cu_acc_power[cu] = curr_cu_acc_power[cu]; + data->cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(>acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, "%u\n", (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); + static int fam15h_power_init_attrs(struct pci_dev *pdev) { int n = FAM15H_MIN_POWER_GROUPS; struct attribute **fam15h_power_attrs; struct cpuinfo_x86 *c = _cpu_data; + u32 cpuid; if (c->x86 == 0x15 && ((c->x86_model <= 0xf) || (c->x86_model >= 0x60 && c->x86_model <= 0x6f))) n += 1; + cpuid = cpuid_edx(0x8007); + + /* check if processor supports accumulated power */ + if (cpuid & BIT(12)) + n += 1; + fam15h_power_attrs = devm_kcalloc(>dev, n, sizeof(*fam15h_power_attrs), GFP_KERNEL); @@ -148,6 +206,9 @@ static int fam15h_power_init_attrs(struct pci_dev *pdev) (c->x86_model >= 0x60 && c->x86_model <= 0x6f))) fam15h_power_attrs[n++] = _attr_power1_input.attr; + if (cpuid & BIT(12)) + fam15h_power_attrs[n++] = _attr_power1_acc.attr; + fam15h_power_group.attrs = fam15h_power_attrs; return 0; @@ -311,6 +372,7 @@ static int fam15h_power_probe(struct pci_dev *pdev, if (ret) return ret; + mutex_init(>acc_pwr_mutex); data->pdev = pdev; hwmon_dev = devm_hwmon_device_register_with_groups(dev, "fam15h_power", -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to
Re: [PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
On Thu, Aug 27, 2015 at 04:07:43PM +0800, Huang Rui wrote: This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui ray.hu...@amd.com --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) { + pr_err(Failed to read PTSC counter MSR on core%d\n, +cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, +curr_cu_acc_power[cu])) { + pr_err(Failed to read compute unit power accumulator MSR on core%d\n, +cpu); + return 0; + } + + if (curr_cu_acc_power[cu] data-cu_acc_power[cu]) { + jdelta[cu] = data-max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data-cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data-cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(data-acc_pwr_mutex); + data-cu_acc_power[cu] = curr_cu_acc_power[cu]; + data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(data-acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, %u\n, (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); I am not really a friend of introducing a non-standard attribute. Does the energy attribute not work here ? Thanks, Guenter -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 12/15] hwmon, fam15h_power: introduce a cpu accumulated power reporting algorithm
This patch introduces an algorithm that computes the average power by reading a delta value of “core power accumulator” register during measurement interval, and then dividing delta value by the length of the time interval. User is able to use power1_acc entry to measure the processor power consumption and power1_acc just needs to be read twice with an needed interval in-between. A simple example: $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc $ sleep 1s $ cat /sys/bus/pci/devices/\:00\:18.4/hwmon/hwmon0/power1_acc The result is current average processor power consumption in 1 seconds. The unit of the result is uWatt. Signed-off-by: Huang Rui ray.hu...@amd.com --- drivers/hwmon/fam15h_power.c | 62 1 file changed, 62 insertions(+) diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index d529e4b..3bab797 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -60,6 +60,7 @@ struct fam15h_power_data { u64 cu_acc_power[MAX_CUS]; /* performance timestamp counter */ u64 cpu_sw_pwr_ptsc[MAX_CUS]; + struct mutex acc_pwr_mutex; }; static ssize_t show_power(struct device *dev, @@ -121,17 +122,74 @@ static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); static struct attribute_group fam15h_power_group; __ATTRIBUTE_GROUPS(fam15h_power); +static ssize_t show_power_acc(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cpu, cu, cu_num, cores_per_cu; + u64 curr_cu_acc_power[MAX_CUS], + curr_ptsc[MAX_CUS], jdelta[MAX_CUS]; + u64 tdelta, avg_acc; + struct fam15h_power_data *data = dev_get_drvdata(dev); + + cores_per_cu = amd_get_cores_per_cu(); + cu_num = boot_cpu_data.x86_max_cores / cores_per_cu; + + for (cpu = 0, avg_acc = 0; cpu cu_num * cores_per_cu; cpu += cores_per_cu) { + cu = cpu / cores_per_cu; + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_PTSC, curr_ptsc[cu])) { + pr_err(Failed to read PTSC counter MSR on core%d\n, + cpu); + return 0; + } + + if (rdmsrl_safe_on_cpu(cpu, MSR_F15H_CU_PWR_ACCUMULATOR, + curr_cu_acc_power[cu])) { + pr_err(Failed to read compute unit power accumulator MSR on core%d\n, + cpu); + return 0; + } + + if (curr_cu_acc_power[cu] data-cu_acc_power[cu]) { + jdelta[cu] = data-max_cu_acc_power + curr_cu_acc_power[cu]; + jdelta[cu] -= data-cu_acc_power[cu]; + } else { + jdelta[cu] = curr_cu_acc_power[cu] - data-cu_acc_power[cu]; + } + tdelta = curr_ptsc[cu] - data-cpu_sw_pwr_ptsc[cu]; + jdelta[cu] *= data-cpu_pwr_sample_ratio * 1000; + do_div(jdelta[cu], tdelta); + + mutex_lock(data-acc_pwr_mutex); + data-cu_acc_power[cu] = curr_cu_acc_power[cu]; + data-cpu_sw_pwr_ptsc[cu] = curr_ptsc[cu]; + mutex_unlock(data-acc_pwr_mutex); + + /* the unit is microWatt */ + avg_acc += jdelta[cu]; + } + + return sprintf(buf, %u\n, (unsigned int) avg_acc); +} +static DEVICE_ATTR(power1_acc, S_IRUGO, show_power_acc, NULL); + static int fam15h_power_init_attrs(struct pci_dev *pdev) { int n = FAM15H_MIN_POWER_GROUPS; struct attribute **fam15h_power_attrs; struct cpuinfo_x86 *c = boot_cpu_data; + u32 cpuid; if (c-x86 == 0x15 ((c-x86_model = 0xf) || (c-x86_model = 0x60 c-x86_model = 0x6f))) n += 1; + cpuid = cpuid_edx(0x8007); + + /* check if processor supports accumulated power */ + if (cpuid BIT(12)) + n += 1; + fam15h_power_attrs = devm_kcalloc(pdev-dev, n, sizeof(*fam15h_power_attrs), GFP_KERNEL); @@ -148,6 +206,9 @@ static int fam15h_power_init_attrs(struct pci_dev *pdev) (c-x86_model = 0x60 c-x86_model = 0x6f))) fam15h_power_attrs[n++] = dev_attr_power1_input.attr; + if (cpuid BIT(12)) + fam15h_power_attrs[n++] = dev_attr_power1_acc.attr; + fam15h_power_group.attrs = fam15h_power_attrs; return 0; @@ -311,6 +372,7 @@ static int fam15h_power_probe(struct pci_dev *pdev, if (ret) return ret; + mutex_init(data-acc_pwr_mutex); data-pdev = pdev; hwmon_dev = devm_hwmon_device_register_with_groups(dev, fam15h_power, -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to