The current hwmon code doesn't check if the returned value was actually an
error.

Since Kepler temperature sensors are able to report negative values. Those
negative values are not for error reporting, but rather when you buried
your GPU in snow somewhere in Antarctica and still want a valid
temperature to be reported (unverified).

Since Pascal (and maybe earlier) we have sensors with improved precision.

Adjust the nvkm_get_temp method to be able to deal with those changes
and let hwmon return an error properly.

Signed-off-by: Karol Herbst <karolher...@gmail.com>
---
 drm/nouveau/include/nvkm/subdev/clk.h   |  4 ++--
 drm/nouveau/include/nvkm/subdev/therm.h |  2 +-
 drm/nouveau/nouveau_hwmon.c             | 15 +++++++++------
 drm/nouveau/nvkm/subdev/clk/base.c      |  2 +-
 drm/nouveau/nvkm/subdev/therm/base.c    | 19 ++++++++++++++-----
 drm/nouveau/nvkm/subdev/therm/g84.c     | 13 ++++++++-----
 drm/nouveau/nvkm/subdev/therm/gp100.c   |  9 +++++----
 drm/nouveau/nvkm/subdev/therm/nv40.c    |  9 +++------
 drm/nouveau/nvkm/subdev/therm/nv50.c    |  9 +++------
 drm/nouveau/nvkm/subdev/therm/priv.h    |  4 ++--
 drm/nouveau/nvkm/subdev/therm/temp.c    | 16 ++++++++++++----
 11 files changed, 60 insertions(+), 42 deletions(-)

diff --git a/drm/nouveau/include/nvkm/subdev/clk.h 
b/drm/nouveau/include/nvkm/subdev/clk.h
index e5275f74..506f8cc6 100644
--- a/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drm/nouveau/include/nvkm/subdev/clk.h
@@ -100,7 +100,7 @@ struct nvkm_clk {
        int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */
        int astate; /* perfmon adjustment (base) */
        int dstate; /* display adjustment (min+) */
-       u8  temp;
+       int temp;
 
        bool allow_reclock;
 #define NVKM_CLK_BOOST_NONE 0x0
@@ -122,7 +122,7 @@ int nvkm_clk_read(struct nvkm_clk *, enum nv_clk_src);
 int nvkm_clk_ustate(struct nvkm_clk *, int req, int pwr);
 int nvkm_clk_astate(struct nvkm_clk *, int req, int rel, bool wait);
 int nvkm_clk_dstate(struct nvkm_clk *, int req, int rel);
-int nvkm_clk_tstate(struct nvkm_clk *, u8 temperature);
+int nvkm_clk_tstate(struct nvkm_clk *, int temperature);
 
 int nv04_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int nv40_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
diff --git a/drm/nouveau/include/nvkm/subdev/therm.h 
b/drm/nouveau/include/nvkm/subdev/therm.h
index 9841f076..8c84017f 100644
--- a/drm/nouveau/include/nvkm/subdev/therm.h
+++ b/drm/nouveau/include/nvkm/subdev/therm.h
@@ -86,7 +86,7 @@ struct nvkm_therm {
        int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
 };
 
-int nvkm_therm_temp_get(struct nvkm_therm *);
+int nvkm_therm_temp_get(struct nvkm_therm *, int *);
 int nvkm_therm_fan_sense(struct nvkm_therm *);
 int nvkm_therm_cstate(struct nvkm_therm *, int, int);
 
diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c
index 7c965648..7486e4af 100644
--- a/drm/nouveau/nouveau_hwmon.c
+++ b/drm/nouveau/nouveau_hwmon.c
@@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int 
channel)
 {
        struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
        struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
+       int val;
 
-       if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
+       if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val))
                return 0;
 
        switch (attr) {
@@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
channel, long *val)
        struct drm_device *drm_dev = dev_get_drvdata(dev);
        struct nouveau_drm *drm = nouveau_drm(drm_dev);
        struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
-       int ret;
+       int ret = 0;
+       int temp;
 
        if (!therm || !therm->attr_get)
                return -EOPNOTSUPP;
 
        switch (attr) {
        case hwmon_temp_input:
-               ret = nvkm_therm_temp_get(therm);
-               *val = ret < 0 ? ret : (ret * 1000);
+               ret = nvkm_therm_temp_get(therm, &temp);
+               *val = temp * 1000;
                break;
        case hwmon_temp_max:
                *val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK)
@@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int 
channel, long *val)
                return -EOPNOTSUPP;
        }
 
-       return 0;
+       return ret;
 }
 
 static int
@@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev)
        struct device *hwmon_dev;
        int ret = 0;
        int i = 0;
+       int val;
 
        hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
        if (!hwmon)
@@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev)
        hwmon->dev = dev;
 
        if (therm && therm->attr_get && therm->attr_set) {
-               if (nvkm_therm_temp_get(therm) >= 0)
+               if (!nvkm_therm_temp_get(therm, &val))
                        special_groups[i++] = &temp1_auto_point_sensor_group;
                if (therm->fan_get && therm->fan_get(therm) >= 0)
                        special_groups[i++] = &pwm_fan_sensor_group;
diff --git a/drm/nouveau/nvkm/subdev/clk/base.c 
b/drm/nouveau/nvkm/subdev/clk/base.c
index e4c8d310..0b28dbb9 100644
--- a/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drm/nouveau/nvkm/subdev/clk/base.c
@@ -540,7 +540,7 @@ nvkm_clk_astate(struct nvkm_clk *clk, int req, int rel, 
bool wait)
 }
 
 int
-nvkm_clk_tstate(struct nvkm_clk *clk, u8 temp)
+nvkm_clk_tstate(struct nvkm_clk *clk, int temp)
 {
        if (clk->temp == temp)
                return 0;
diff --git a/drm/nouveau/nvkm/subdev/therm/base.c 
b/drm/nouveau/nvkm/subdev/therm/base.c
index f27fc6d0..8e5f6f7f 100644
--- a/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drm/nouveau/nvkm/subdev/therm/base.c
@@ -24,22 +24,26 @@
 #include "priv.h"
 
 int
-nvkm_therm_temp_get(struct nvkm_therm *therm)
+nvkm_therm_temp_get(struct nvkm_therm *therm, int *val)
 {
        if (therm->func->temp_get)
-               return therm->func->temp_get(therm);
+               return therm->func->temp_get(therm, val);
        return -ENODEV;
 }
 
 static int
 nvkm_therm_update_trip(struct nvkm_therm *therm)
 {
+       int temp, ret;
        struct nvbios_therm_trip_point *trip = therm->fan->bios.trip,
                                       *cur_trip = NULL,
                                       *last_trip = therm->last_trip;
-       u8  temp = therm->func->temp_get(therm);
        u16 duty, i;
 
+       ret = therm->func->temp_get(therm, &temp);
+       if (ret < 0)
+               return ret;
+
        /* look for the trip point corresponding to the current temperature */
        cur_trip = NULL;
        for (i = 0; i < therm->fan->bios.nr_fan_trip; i++) {
@@ -67,9 +71,13 @@ static int
 nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
                                u8 linear_max_temp)
 {
-       u8  temp = therm->func->temp_get(therm);
+       int temp, ret;
        u16 duty;
 
+       ret = therm->func->temp_get(therm, &temp);
+       if (ret < 0)
+               return ret;
+
        /* handle the non-linear part first */
        if (temp < linear_min_temp)
                return therm->fan->bios.min_duty;
@@ -182,6 +190,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
 {
        struct nvkm_subdev *subdev = &therm->subdev;
        struct nvkm_device *device = subdev->device;
+       int val;
        static const char *name[] = {
                "disabled",
                "manual",
@@ -197,7 +206,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
        /* do not allow automatic fan management if the thermal sensor is
         * not available */
        if (mode == NVKM_THERM_CTRL_AUTO &&
-           therm->func->temp_get(therm) < 0)
+           therm->func->temp_get(therm, &val))
                return -EINVAL;
 
        if (therm->mode == mode)
diff --git a/drm/nouveau/nvkm/subdev/therm/g84.c 
b/drm/nouveau/nvkm/subdev/therm/g84.c
index 96f8da40..81c0bda8 100644
--- a/drm/nouveau/nvkm/subdev/therm/g84.c
+++ b/drm/nouveau/nvkm/subdev/therm/g84.c
@@ -27,14 +27,15 @@
 #include <subdev/fuse.h>
 
 int
-g84_temp_get(struct nvkm_therm *therm)
+g84_temp_get(struct nvkm_therm *therm, int *val)
 {
        struct nvkm_device *device = therm->subdev.device;
 
-       if (nvkm_fuse_read(device->fuse, 0x1a8) == 1)
-               return nvkm_rd32(device, 0x20400);
-       else
+       if (nvkm_fuse_read(device->fuse, 0x1a8) != 1)
                return -ENODEV;
+
+       *val = nvkm_rd32(device, 0x20400);
+       return 0;
 }
 
 void
@@ -114,8 +115,10 @@ g84_therm_threshold_hyst_emulation(struct nvkm_therm 
*therm,
                new_state = NVKM_THERM_THRS_LOWER;
        }
 
+       if (therm->func->temp_get(therm, &cur))
+               return;
+
        /* fix the state (in case someone reprogrammed the alarms) */
-       cur = therm->func->temp_get(therm);
        if (new_state == NVKM_THERM_THRS_LOWER && cur > thrs->temp)
                new_state = NVKM_THERM_THRS_HIGHER;
        else if (new_state == NVKM_THERM_THRS_HIGHER &&
diff --git a/drm/nouveau/nvkm/subdev/therm/gp100.c 
b/drm/nouveau/nvkm/subdev/therm/gp100.c
index 9f0dea3f..d8206748 100644
--- a/drm/nouveau/nvkm/subdev/therm/gp100.c
+++ b/drm/nouveau/nvkm/subdev/therm/gp100.c
@@ -24,7 +24,7 @@
 #include "priv.h"
 
 static int
-gp100_temp_get(struct nvkm_therm *therm)
+gp100_temp_get(struct nvkm_therm *therm, int *val)
 {
        struct nvkm_device *device = therm->subdev.device;
        struct nvkm_subdev *subdev = &therm->subdev;
@@ -36,9 +36,10 @@ gp100_temp_get(struct nvkm_therm *therm)
                nvkm_trace(subdev, "reading temperature from SHADOWed 
sensor\n");
 
        /* device valid */
-       if (tsensor & 0x20000000)
-               return (inttemp >> 8);
-       else
+       if (tsensor & 0x20000000) {
+               *val = inttemp >> 8;
+               return 0;
+       } else
                return -ENODEV;
 }
 
diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c 
b/drm/nouveau/nvkm/subdev/therm/nv40.c
index 2c92ffb5..cfd5b215 100644
--- a/drm/nouveau/nvkm/subdev/therm/nv40.c
+++ b/drm/nouveau/nvkm/subdev/therm/nv40.c
@@ -70,7 +70,7 @@ nv40_sensor_setup(struct nvkm_therm *therm)
 }
 
 static int
-nv40_temp_get(struct nvkm_therm *therm)
+nv40_temp_get(struct nvkm_therm *therm, int *val)
 {
        struct nvkm_device *device = therm->subdev.device;
        struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
@@ -95,11 +95,8 @@ nv40_temp_get(struct nvkm_therm *therm)
        core_temp = core_temp + sensor->offset_num / sensor->offset_den;
        core_temp = core_temp + sensor->offset_constant - 8;
 
-       /* reserve negative temperatures for errors */
-       if (core_temp < 0)
-               core_temp = 0;
-
-       return core_temp;
+       *val = core_temp;
+       return 0;
 }
 
 static int
diff --git a/drm/nouveau/nvkm/subdev/therm/nv50.c 
b/drm/nouveau/nvkm/subdev/therm/nv50.c
index 9b57b433..62ec4063 100644
--- a/drm/nouveau/nvkm/subdev/therm/nv50.c
+++ b/drm/nouveau/nvkm/subdev/therm/nv50.c
@@ -126,7 +126,7 @@ nv50_sensor_setup(struct nvkm_therm *therm)
 }
 
 static int
-nv50_temp_get(struct nvkm_therm *therm)
+nv50_temp_get(struct nvkm_therm *therm, int *val)
 {
        struct nvkm_device *device = therm->subdev.device;
        struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
@@ -143,11 +143,8 @@ nv50_temp_get(struct nvkm_therm *therm)
        core_temp = core_temp + sensor->offset_num / sensor->offset_den;
        core_temp = core_temp + sensor->offset_constant - 8;
 
-       /* reserve negative temperatures for errors */
-       if (core_temp < 0)
-               core_temp = 0;
-
-       return core_temp;
+       *val = core_temp;
+       return 0;
 }
 
 static void
diff --git a/drm/nouveau/nvkm/subdev/therm/priv.h 
b/drm/nouveau/nvkm/subdev/therm/priv.h
index 1f46e371..b325ec5f 100644
--- a/drm/nouveau/nvkm/subdev/therm/priv.h
+++ b/drm/nouveau/nvkm/subdev/therm/priv.h
@@ -91,7 +91,7 @@ struct nvkm_therm_func {
        int (*pwm_set)(struct nvkm_therm *, int line, u32, u32);
        int (*pwm_clock)(struct nvkm_therm *, int line);
 
-       int (*temp_get)(struct nvkm_therm *);
+       int (*temp_get)(struct nvkm_therm *, int *);
 
        int (*fan_sense)(struct nvkm_therm *);
 
@@ -105,7 +105,7 @@ int  nv50_fan_pwm_get(struct nvkm_therm *, int, u32 *, u32 
*);
 int  nv50_fan_pwm_set(struct nvkm_therm *, int, u32, u32);
 int  nv50_fan_pwm_clock(struct nvkm_therm *, int);
 
-int  g84_temp_get(struct nvkm_therm *);
+int  g84_temp_get(struct nvkm_therm *, int *);
 void g84_sensor_setup(struct nvkm_therm *);
 void g84_therm_fini(struct nvkm_therm *);
 
diff --git a/drm/nouveau/nvkm/subdev/therm/temp.c 
b/drm/nouveau/nvkm/subdev/therm/temp.c
index ddb2b2c6..e7b8cbe2 100644
--- a/drm/nouveau/nvkm/subdev/therm/temp.c
+++ b/drm/nouveau/nvkm/subdev/therm/temp.c
@@ -86,7 +86,10 @@ nvkm_therm_sensor_event(struct nvkm_therm *therm, enum 
nvkm_therm_thrs thrs,
        static const char * const thresholds[] = {
                "fanboost", "downclock", "critical", "shutdown"
        };
-       int temperature = therm->func->temp_get(therm);
+       int temperature;
+
+       if (therm->func->temp_get(therm, &temperature))
+               return;
 
        if (thrs < 0 || thrs > 3)
                return;
@@ -140,7 +143,10 @@ nvkm_therm_threshold_hyst_polling(struct nvkm_therm *therm,
 {
        enum nvkm_therm_thrs_direction direction;
        enum nvkm_therm_thrs_state prev_state, new_state;
-       int temp = therm->func->temp_get(therm);
+       int temp;
+
+       if (therm->func->temp_get(therm, &temp))
+               return;
 
        prev_state = nvkm_therm_sensor_get_threshold_state(therm, thrs_name);
 
@@ -166,6 +172,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
        struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
        struct nvkm_timer *tmr = therm->subdev.device->timer;
        unsigned long flags;
+       int val;
 
        spin_lock_irqsave(&therm->sensor.alarm_program_lock, flags);
 
@@ -185,7 +192,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
        spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags);
 
        /* schedule the next poll in one second */
-       if (therm->func->temp_get(therm) >= 0)
+       if (!therm->func->temp_get(therm, &val))
                nvkm_timer_alarm(tmr, 1000000000ULL, alarm);
 }
 
@@ -227,9 +234,10 @@ nvkm_therm_sensor_fini(struct nvkm_therm *therm, bool 
suspend)
 void
 nvkm_therm_sensor_preinit(struct nvkm_therm *therm)
 {
+       int val;
        const char *sensor_avail = "yes";
 
-       if (therm->func->temp_get(therm) < 0)
+       if (therm->func->temp_get(therm, &val))
                sensor_avail = "no";
 
        nvkm_debug(&therm->subdev, "internal sensor: %s\n", sensor_avail);
-- 
2.15.0

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau

Reply via email to