Re: [Nouveau] [RFC PATCH 01/29] therm: split return code and value in nvkm_get_temp

2017-10-08 Thread Karol Herbst
On Sun, Oct 8, 2017 at 12:16 PM, Pierre Moreau  wrote:
> As you changed the return value of `temp_get()` to solely be the error code, 
> or
> absence of an error, I would change all those tests that checked whether the
> returned value was strictly less, or greater than, 0 to now only compare
> against 0 (no error). For example,
>
>   if (therm && therm->attr_get && nvkm_therm_temp_get(therm, ) < 0)
>   if (therm->func->temp_get(therm, ) >= 0)
>
> could become
>
>   if (therm && therm->attr_get && nvkm_therm_temp_get(therm, ))
>   if (!therm->func->temp_get(therm, ))
>

right, makes sense.

> to match other error checking code.
>
> More comments below
>
> On 2017-09-15 — 17:11, Karol Herbst wrote:
>> The current hwmon code doesn't check if the returned value was actually an
>> error.
>>
>> Since Kepler temperature sensors are able to report negative values.
>
> I assume those negative values are not for error reporting, but more if you
> buried your GPU in the snow somewhere in the Antarctic and still want a valid
> temperature to be reported.

well, nobody ever tested that, but at least on Kepler technically we
have two new registers related to therm, one offset and one value. If
the offset is 0x60, value needs to be substracted by that offset to
get the real temperature. No idea if that works for real though, but
why would they add it anyhow if not?

>
>> Since Pascal (and maybe earlier) we have sensors with improved precision.
>>
>> Adjust the nvkm_get_temp method to be able to deal with those changes
>> and let hwmon return an error properly.
>>
>> Signed-off-by: Karol Herbst 
>> ---
>>  drm/nouveau/include/nvkm/subdev/therm.h |  2 +-
>>  drm/nouveau/nouveau_hwmon.c | 15 +--
>>  drm/nouveau/nvkm/subdev/therm/base.c| 19 ++-
>>  drm/nouveau/nvkm/subdev/therm/g84.c | 11 ++-
>>  drm/nouveau/nvkm/subdev/therm/gp100.c   |  9 +
>>  drm/nouveau/nvkm/subdev/therm/nv40.c|  9 +++--
>>  drm/nouveau/nvkm/subdev/therm/nv50.c|  9 +++--
>>  drm/nouveau/nvkm/subdev/therm/priv.h|  4 ++--
>>  drm/nouveau/nvkm/subdev/therm/temp.c| 16 
>>  9 files changed, 55 insertions(+), 39 deletions(-)
>>
>> diff --git a/drm/nouveau/include/nvkm/subdev/therm.h 
>> b/drm/nouveau/include/nvkm/subdev/therm.h
>> index 9841f076..8c84017f 100644
>> --- a/drm/nouveau/include/nvkm/subdev/therm.h
>> +++ b/drm/nouveau/include/nvkm/subdev/therm.h
>> @@ -86,7 +86,7 @@ struct nvkm_therm {
>>   int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
>>  };
>>
>> -int nvkm_therm_temp_get(struct nvkm_therm *);
>> +int nvkm_therm_temp_get(struct nvkm_therm *, int *);
>>  int nvkm_therm_fan_sense(struct nvkm_therm *);
>>  int nvkm_therm_cstate(struct nvkm_therm *, int, int);
>>
>> diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c
>> index 7c965648..f1914d9a 100644
>> --- a/drm/nouveau/nouveau_hwmon.c
>> +++ b/drm/nouveau/nouveau_hwmon.c
>> @@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int 
>> channel)
>>  {
>>   struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
>>   struct nvkm_therm *therm = nvxx_therm(>client.device);
>> + int val;
>>
>> - if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
>> + if (therm && therm->attr_get && nvkm_therm_temp_get(therm, ) < 0)
>>   return 0;
>>
>>   switch (attr) {
>> @@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
>> channel, long *val)
>>   struct drm_device *drm_dev = dev_get_drvdata(dev);
>>   struct nouveau_drm *drm = nouveau_drm(drm_dev);
>>   struct nvkm_therm *therm = nvxx_therm(>client.device);
>> - int ret;
>> + int ret = 0;
>> + int temp;
>>
>>   if (!therm || !therm->attr_get)
>>   return -EOPNOTSUPP;
>>
>>   switch (attr) {
>>   case hwmon_temp_input:
>> - ret = nvkm_therm_temp_get(therm);
>> - *val = ret < 0 ? ret : (ret * 1000);
>> + ret = nvkm_therm_temp_get(therm, );
>> + *val = temp * 1000;
>>   break;
>>   case hwmon_temp_max:
>>   *val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK)
>> @@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
>> channel, long *val)
>>   return -EOPNOTSUPP;
>>   }
>>
>> - return 0;
>> + return ret;
>>  }
>>
>>  static int
>> @@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>>   struct device *hwmon_dev;
>>   int ret = 0;
>>   int i = 0;
>> + int val;
>>
>>   hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
>>   if (!hwmon)
>> @@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>>   hwmon->dev = dev;
>>
>>   if (therm && therm->attr_get && therm->attr_set) {
>> - if (nvkm_therm_temp_get(therm) >= 0)
>> + if 

Re: [Nouveau] [RFC PATCH 01/29] therm: split return code and value in nvkm_get_temp

2017-10-08 Thread Pierre Moreau
As you changed the return value of `temp_get()` to solely be the error code, or
absence of an error, I would change all those tests that checked whether the
returned value was strictly less, or greater than, 0 to now only compare
against 0 (no error). For example,

  if (therm && therm->attr_get && nvkm_therm_temp_get(therm, ) < 0)
  if (therm->func->temp_get(therm, ) >= 0)

could become

  if (therm && therm->attr_get && nvkm_therm_temp_get(therm, ))
  if (!therm->func->temp_get(therm, ))

to match other error checking code.

More comments below

On 2017-09-15 — 17:11, Karol Herbst wrote:
> The current hwmon code doesn't check if the returned value was actually an
> error.
> 
> Since Kepler temperature sensors are able to report negative values.

I assume those negative values are not for error reporting, but more if you
buried your GPU in the snow somewhere in the Antarctic and still want a valid
temperature to be reported.

> Since Pascal (and maybe earlier) we have sensors with improved precision.
> 
> Adjust the nvkm_get_temp method to be able to deal with those changes
> and let hwmon return an error properly.
> 
> Signed-off-by: Karol Herbst 
> ---
>  drm/nouveau/include/nvkm/subdev/therm.h |  2 +-
>  drm/nouveau/nouveau_hwmon.c | 15 +--
>  drm/nouveau/nvkm/subdev/therm/base.c| 19 ++-
>  drm/nouveau/nvkm/subdev/therm/g84.c | 11 ++-
>  drm/nouveau/nvkm/subdev/therm/gp100.c   |  9 +
>  drm/nouveau/nvkm/subdev/therm/nv40.c|  9 +++--
>  drm/nouveau/nvkm/subdev/therm/nv50.c|  9 +++--
>  drm/nouveau/nvkm/subdev/therm/priv.h|  4 ++--
>  drm/nouveau/nvkm/subdev/therm/temp.c| 16 
>  9 files changed, 55 insertions(+), 39 deletions(-)
> 
> diff --git a/drm/nouveau/include/nvkm/subdev/therm.h 
> b/drm/nouveau/include/nvkm/subdev/therm.h
> index 9841f076..8c84017f 100644
> --- a/drm/nouveau/include/nvkm/subdev/therm.h
> +++ b/drm/nouveau/include/nvkm/subdev/therm.h
> @@ -86,7 +86,7 @@ struct nvkm_therm {
>   int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
>  };
>  
> -int nvkm_therm_temp_get(struct nvkm_therm *);
> +int nvkm_therm_temp_get(struct nvkm_therm *, int *);
>  int nvkm_therm_fan_sense(struct nvkm_therm *);
>  int nvkm_therm_cstate(struct nvkm_therm *, int, int);
>  
> diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c
> index 7c965648..f1914d9a 100644
> --- a/drm/nouveau/nouveau_hwmon.c
> +++ b/drm/nouveau/nouveau_hwmon.c
> @@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int 
> channel)
>  {
>   struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
>   struct nvkm_therm *therm = nvxx_therm(>client.device);
> + int val;
>  
> - if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
> + if (therm && therm->attr_get && nvkm_therm_temp_get(therm, ) < 0)
>   return 0;
>  
>   switch (attr) {
> @@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
> channel, long *val)
>   struct drm_device *drm_dev = dev_get_drvdata(dev);
>   struct nouveau_drm *drm = nouveau_drm(drm_dev);
>   struct nvkm_therm *therm = nvxx_therm(>client.device);
> - int ret;
> + int ret = 0;
> + int temp;
>  
>   if (!therm || !therm->attr_get)
>   return -EOPNOTSUPP;
>  
>   switch (attr) {
>   case hwmon_temp_input:
> - ret = nvkm_therm_temp_get(therm);
> - *val = ret < 0 ? ret : (ret * 1000);
> + ret = nvkm_therm_temp_get(therm, );
> + *val = temp * 1000;
>   break;
>   case hwmon_temp_max:
>   *val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK)
> @@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
> channel, long *val)
>   return -EOPNOTSUPP;
>   }
>  
> - return 0;
> + return ret;
>  }
>  
>  static int
> @@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>   struct device *hwmon_dev;
>   int ret = 0;
>   int i = 0;
> + int val;
>  
>   hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
>   if (!hwmon)
> @@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>   hwmon->dev = dev;
>  
>   if (therm && therm->attr_get && therm->attr_set) {
> - if (nvkm_therm_temp_get(therm) >= 0)
> + if (nvkm_therm_temp_get(therm, ) >= 0)
>   special_groups[i++] = _auto_point_sensor_group;
>   if (therm->fan_get && therm->fan_get(therm) >= 0)
>   special_groups[i++] = _fan_sensor_group;
> diff --git a/drm/nouveau/nvkm/subdev/therm/base.c 
> b/drm/nouveau/nvkm/subdev/therm/base.c
> index f27fc6d0..8fa691fb 100644
> --- a/drm/nouveau/nvkm/subdev/therm/base.c
> +++ b/drm/nouveau/nvkm/subdev/therm/base.c
> @@ -24,22 +24,26 @@
>  #include "priv.h"
>  
>  int
> 

[Nouveau] [RFC PATCH 01/29] therm: split return code and value in nvkm_get_temp

2017-09-15 Thread Karol Herbst
The current hwmon code doesn't check if the returned value was actually an
error.

Since Kepler temperature sensors are able to report negative values.
Since Pascal (and maybe earlier) we have sensors with improved precision.

Adjust the nvkm_get_temp method to be able to deal with those changes
and let hwmon return an error properly.

Signed-off-by: Karol Herbst 
---
 drm/nouveau/include/nvkm/subdev/therm.h |  2 +-
 drm/nouveau/nouveau_hwmon.c | 15 +--
 drm/nouveau/nvkm/subdev/therm/base.c| 19 ++-
 drm/nouveau/nvkm/subdev/therm/g84.c | 11 ++-
 drm/nouveau/nvkm/subdev/therm/gp100.c   |  9 +
 drm/nouveau/nvkm/subdev/therm/nv40.c|  9 +++--
 drm/nouveau/nvkm/subdev/therm/nv50.c|  9 +++--
 drm/nouveau/nvkm/subdev/therm/priv.h|  4 ++--
 drm/nouveau/nvkm/subdev/therm/temp.c| 16 
 9 files changed, 55 insertions(+), 39 deletions(-)

diff --git a/drm/nouveau/include/nvkm/subdev/therm.h 
b/drm/nouveau/include/nvkm/subdev/therm.h
index 9841f076..8c84017f 100644
--- a/drm/nouveau/include/nvkm/subdev/therm.h
+++ b/drm/nouveau/include/nvkm/subdev/therm.h
@@ -86,7 +86,7 @@ struct nvkm_therm {
int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
 };
 
-int nvkm_therm_temp_get(struct nvkm_therm *);
+int nvkm_therm_temp_get(struct nvkm_therm *, int *);
 int nvkm_therm_fan_sense(struct nvkm_therm *);
 int nvkm_therm_cstate(struct nvkm_therm *, int, int);
 
diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c
index 7c965648..f1914d9a 100644
--- a/drm/nouveau/nouveau_hwmon.c
+++ b/drm/nouveau/nouveau_hwmon.c
@@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int 
channel)
 {
struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
struct nvkm_therm *therm = nvxx_therm(>client.device);
+   int val;
 
-   if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
+   if (therm && therm->attr_get && nvkm_therm_temp_get(therm, ) < 0)
return 0;
 
switch (attr) {
@@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
channel, long *val)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct nouveau_drm *drm = nouveau_drm(drm_dev);
struct nvkm_therm *therm = nvxx_therm(>client.device);
-   int ret;
+   int ret = 0;
+   int temp;
 
if (!therm || !therm->attr_get)
return -EOPNOTSUPP;
 
switch (attr) {
case hwmon_temp_input:
-   ret = nvkm_therm_temp_get(therm);
-   *val = ret < 0 ? ret : (ret * 1000);
+   ret = nvkm_therm_temp_get(therm, );
+   *val = temp * 1000;
break;
case hwmon_temp_max:
*val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK)
@@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
channel, long *val)
return -EOPNOTSUPP;
}
 
-   return 0;
+   return ret;
 }
 
 static int
@@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev)
struct device *hwmon_dev;
int ret = 0;
int i = 0;
+   int val;
 
hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
@@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev)
hwmon->dev = dev;
 
if (therm && therm->attr_get && therm->attr_set) {
-   if (nvkm_therm_temp_get(therm) >= 0)
+   if (nvkm_therm_temp_get(therm, ) >= 0)
special_groups[i++] = _auto_point_sensor_group;
if (therm->fan_get && therm->fan_get(therm) >= 0)
special_groups[i++] = _fan_sensor_group;
diff --git a/drm/nouveau/nvkm/subdev/therm/base.c 
b/drm/nouveau/nvkm/subdev/therm/base.c
index f27fc6d0..8fa691fb 100644
--- a/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drm/nouveau/nvkm/subdev/therm/base.c
@@ -24,22 +24,26 @@
 #include "priv.h"
 
 int
-nvkm_therm_temp_get(struct nvkm_therm *therm)
+nvkm_therm_temp_get(struct nvkm_therm *therm, int *val)
 {
if (therm->func->temp_get)
-   return therm->func->temp_get(therm);
+   return therm->func->temp_get(therm, val);
return -ENODEV;
 }
 
 static int
 nvkm_therm_update_trip(struct nvkm_therm *therm)
 {
+   int temp, ret;
struct nvbios_therm_trip_point *trip = therm->fan->bios.trip,
   *cur_trip = NULL,
   *last_trip = therm->last_trip;
-   u8  temp = therm->func->temp_get(therm);
u16 duty, i;
 
+   ret = therm->func->temp_get(therm, );
+   if (ret < 0)
+   return ret;
+
/* look for the trip point corresponding to the current temperature */
cur_trip = NULL;
for (i = 0; i < therm->fan->bios.nr_fan_trip; i++) {
@@ -67,9 +71,13 @@ static int