As you changed the return value of `temp_get()` to solely be the error code, or absence of an error, I would change all those tests that checked whether the returned value was strictly less, or greater than, 0 to now only compare against 0 (no error). For example,
if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val) < 0) if (therm->func->temp_get(therm, &val) >= 0) could become if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val)) if (!therm->func->temp_get(therm, &val)) to match other error checking code. More comments below On 2017-09-15 — 17:11, Karol Herbst wrote: > The current hwmon code doesn't check if the returned value was actually an > error. > > Since Kepler temperature sensors are able to report negative values. I assume those negative values are not for error reporting, but more if you buried your GPU in the snow somewhere in the Antarctic and still want a valid temperature to be reported. > Since Pascal (and maybe earlier) we have sensors with improved precision. > > Adjust the nvkm_get_temp method to be able to deal with those changes > and let hwmon return an error properly. > > Signed-off-by: Karol Herbst <karolher...@gmail.com> > --- > drm/nouveau/include/nvkm/subdev/therm.h | 2 +- > drm/nouveau/nouveau_hwmon.c | 15 +++++++++------ > drm/nouveau/nvkm/subdev/therm/base.c | 19 ++++++++++++++----- > drm/nouveau/nvkm/subdev/therm/g84.c | 11 ++++++----- > drm/nouveau/nvkm/subdev/therm/gp100.c | 9 +++++---- > drm/nouveau/nvkm/subdev/therm/nv40.c | 9 +++------ > drm/nouveau/nvkm/subdev/therm/nv50.c | 9 +++------ > drm/nouveau/nvkm/subdev/therm/priv.h | 4 ++-- > drm/nouveau/nvkm/subdev/therm/temp.c | 16 ++++++++++++---- > 9 files changed, 55 insertions(+), 39 deletions(-) > > diff --git a/drm/nouveau/include/nvkm/subdev/therm.h > b/drm/nouveau/include/nvkm/subdev/therm.h > index 9841f076..8c84017f 100644 > --- a/drm/nouveau/include/nvkm/subdev/therm.h > +++ b/drm/nouveau/include/nvkm/subdev/therm.h > @@ -86,7 +86,7 @@ struct nvkm_therm { > int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int); > }; > > -int nvkm_therm_temp_get(struct nvkm_therm *); > +int nvkm_therm_temp_get(struct nvkm_therm *, int *); > int nvkm_therm_fan_sense(struct nvkm_therm *); > int nvkm_therm_cstate(struct nvkm_therm *, int, int); > > diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c > index 7c965648..f1914d9a 100644 > --- a/drm/nouveau/nouveau_hwmon.c > +++ b/drm/nouveau/nouveau_hwmon.c > @@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int > channel) > { > struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data); > struct nvkm_therm *therm = nvxx_therm(&drm->client.device); > + int val; > > - if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0) > + if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val) < 0) > return 0; > > switch (attr) { > @@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int > channel, long *val) > struct drm_device *drm_dev = dev_get_drvdata(dev); > struct nouveau_drm *drm = nouveau_drm(drm_dev); > struct nvkm_therm *therm = nvxx_therm(&drm->client.device); > - int ret; > + int ret = 0; > + int temp; > > if (!therm || !therm->attr_get) > return -EOPNOTSUPP; > > switch (attr) { > case hwmon_temp_input: > - ret = nvkm_therm_temp_get(therm); > - *val = ret < 0 ? ret : (ret * 1000); > + ret = nvkm_therm_temp_get(therm, &temp); > + *val = temp * 1000; > break; > case hwmon_temp_max: > *val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK) > @@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int > channel, long *val) > return -EOPNOTSUPP; > } > > - return 0; > + return ret; > } > > static int > @@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev) > struct device *hwmon_dev; > int ret = 0; > int i = 0; > + int val; > > hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); > if (!hwmon) > @@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev) > hwmon->dev = dev; > > if (therm && therm->attr_get && therm->attr_set) { > - if (nvkm_therm_temp_get(therm) >= 0) > + if (nvkm_therm_temp_get(therm, &val) >= 0) > special_groups[i++] = &temp1_auto_point_sensor_group; > if (therm->fan_get && therm->fan_get(therm) >= 0) > special_groups[i++] = &pwm_fan_sensor_group; > diff --git a/drm/nouveau/nvkm/subdev/therm/base.c > b/drm/nouveau/nvkm/subdev/therm/base.c > index f27fc6d0..8fa691fb 100644 > --- a/drm/nouveau/nvkm/subdev/therm/base.c > +++ b/drm/nouveau/nvkm/subdev/therm/base.c > @@ -24,22 +24,26 @@ > #include "priv.h" > > int > -nvkm_therm_temp_get(struct nvkm_therm *therm) > +nvkm_therm_temp_get(struct nvkm_therm *therm, int *val) > { > if (therm->func->temp_get) > - return therm->func->temp_get(therm); > + return therm->func->temp_get(therm, val); > return -ENODEV; > } > > static int > nvkm_therm_update_trip(struct nvkm_therm *therm) > { > + int temp, ret; > struct nvbios_therm_trip_point *trip = therm->fan->bios.trip, > *cur_trip = NULL, > *last_trip = therm->last_trip; > - u8 temp = therm->func->temp_get(therm); > u16 duty, i; > > + ret = therm->func->temp_get(therm, &temp); > + if (ret < 0) > + return ret; > + > /* look for the trip point corresponding to the current temperature */ > cur_trip = NULL; > for (i = 0; i < therm->fan->bios.nr_fan_trip; i++) { > @@ -67,9 +71,13 @@ static int > nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp, > u8 linear_max_temp) > { > - u8 temp = therm->func->temp_get(therm); > + int temp, ret; > u16 duty; > > + ret = therm->func->temp_get(therm, &temp); > + if (ret < 0) > + return ret; > + > /* handle the non-linear part first */ > if (temp < linear_min_temp) > return therm->fan->bios.min_duty; > @@ -182,6 +190,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode) > { > struct nvkm_subdev *subdev = &therm->subdev; > struct nvkm_device *device = subdev->device; > + int val; > static const char *name[] = { > "disabled", > "manual", > @@ -197,7 +206,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode) > /* do not allow automatic fan management if the thermal sensor is > * not available */ > if (mode == NVKM_THERM_CTRL_AUTO && > - therm->func->temp_get(therm) < 0) > + therm->func->temp_get(therm, &val) < 0) > return -EINVAL; > > if (therm->mode == mode) > diff --git a/drm/nouveau/nvkm/subdev/therm/g84.c > b/drm/nouveau/nvkm/subdev/therm/g84.c > index 96f8da40..3bb2d829 100644 > --- a/drm/nouveau/nvkm/subdev/therm/g84.c > +++ b/drm/nouveau/nvkm/subdev/therm/g84.c > @@ -27,14 +27,15 @@ > #include <subdev/fuse.h> > > int > -g84_temp_get(struct nvkm_therm *therm) > +g84_temp_get(struct nvkm_therm *therm, int *val) > { > struct nvkm_device *device = therm->subdev.device; > > - if (nvkm_fuse_read(device->fuse, 0x1a8) == 1) > - return nvkm_rd32(device, 0x20400); > - else > + if (nvkm_fuse_read(device->fuse, 0x1a8) != 1) > return -ENODEV; > + > + *val = nvkm_rd32(device, 0x20400); You could allow passing NULL values for `val` for when you are not interested by the results, as you did in some places further up. I would probably check for NULL values for `val`, and for `therm` though it seems to be an implicit rule that the caller should be the one doing the check. > + return 0; > } > > void > @@ -115,7 +116,7 @@ g84_therm_threshold_hyst_emulation(struct nvkm_therm > *therm, > } > > /* fix the state (in case someone reprogrammed the alarms) */ > - cur = therm->func->temp_get(therm); > + therm->func->temp_get(therm, &cur); Should there be a check for the return value of the `temp_get()` function here, and error out if an error code was returned? In any case, you are the behaviour here, as if `temp_get()` previously returned an error code, the if-statement would always be false, whereas now, it’s undecided as `cur` will not even be initialised in that case! I am not sure whether it should be the caller of `temp_get()` or `temp_get()` itself which should initialise a default value for the `val` argument (probably the latter). > if (new_state == NVKM_THERM_THRS_LOWER && cur > thrs->temp) > new_state = NVKM_THERM_THRS_HIGHER; > else if (new_state == NVKM_THERM_THRS_HIGHER && > diff --git a/drm/nouveau/nvkm/subdev/therm/gp100.c > b/drm/nouveau/nvkm/subdev/therm/gp100.c > index 9f0dea3f..d8206748 100644 > --- a/drm/nouveau/nvkm/subdev/therm/gp100.c > +++ b/drm/nouveau/nvkm/subdev/therm/gp100.c > @@ -24,7 +24,7 @@ > #include "priv.h" > > static int > -gp100_temp_get(struct nvkm_therm *therm) > +gp100_temp_get(struct nvkm_therm *therm, int *val) > { > struct nvkm_device *device = therm->subdev.device; > struct nvkm_subdev *subdev = &therm->subdev; > @@ -36,9 +36,10 @@ gp100_temp_get(struct nvkm_therm *therm) > nvkm_trace(subdev, "reading temperature from SHADOWed > sensor\n"); > > /* device valid */ > - if (tsensor & 0x20000000) > - return (inttemp >> 8); > - else > + if (tsensor & 0x20000000) { > + *val = inttemp >> 8; > + return 0; > + } else > return -ENODEV; > } > > diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c > b/drm/nouveau/nvkm/subdev/therm/nv40.c > index 2c92ffb5..cfd5b215 100644 > --- a/drm/nouveau/nvkm/subdev/therm/nv40.c > +++ b/drm/nouveau/nvkm/subdev/therm/nv40.c > @@ -70,7 +70,7 @@ nv40_sensor_setup(struct nvkm_therm *therm) > } > > static int > -nv40_temp_get(struct nvkm_therm *therm) > +nv40_temp_get(struct nvkm_therm *therm, int *val) > { > struct nvkm_device *device = therm->subdev.device; > struct nvbios_therm_sensor *sensor = &therm->bios_sensor; > @@ -95,11 +95,8 @@ nv40_temp_get(struct nvkm_therm *therm) > core_temp = core_temp + sensor->offset_num / sensor->offset_den; > core_temp = core_temp + sensor->offset_constant - 8; > > - /* reserve negative temperatures for errors */ > - if (core_temp < 0) > - core_temp = 0; > - > - return core_temp; > + *val = core_temp; > + return 0; > } > > static int > diff --git a/drm/nouveau/nvkm/subdev/therm/nv50.c > b/drm/nouveau/nvkm/subdev/therm/nv50.c > index 9b57b433..62ec4063 100644 > --- a/drm/nouveau/nvkm/subdev/therm/nv50.c > +++ b/drm/nouveau/nvkm/subdev/therm/nv50.c > @@ -126,7 +126,7 @@ nv50_sensor_setup(struct nvkm_therm *therm) > } > > static int > -nv50_temp_get(struct nvkm_therm *therm) > +nv50_temp_get(struct nvkm_therm *therm, int *val) > { > struct nvkm_device *device = therm->subdev.device; > struct nvbios_therm_sensor *sensor = &therm->bios_sensor; > @@ -143,11 +143,8 @@ nv50_temp_get(struct nvkm_therm *therm) > core_temp = core_temp + sensor->offset_num / sensor->offset_den; > core_temp = core_temp + sensor->offset_constant - 8; > > - /* reserve negative temperatures for errors */ > - if (core_temp < 0) > - core_temp = 0; > - > - return core_temp; > + *val = core_temp; > + return 0; > } > > static void > diff --git a/drm/nouveau/nvkm/subdev/therm/priv.h > b/drm/nouveau/nvkm/subdev/therm/priv.h > index 1f46e371..b325ec5f 100644 > --- a/drm/nouveau/nvkm/subdev/therm/priv.h > +++ b/drm/nouveau/nvkm/subdev/therm/priv.h > @@ -91,7 +91,7 @@ struct nvkm_therm_func { > int (*pwm_set)(struct nvkm_therm *, int line, u32, u32); > int (*pwm_clock)(struct nvkm_therm *, int line); > > - int (*temp_get)(struct nvkm_therm *); > + int (*temp_get)(struct nvkm_therm *, int *); > > int (*fan_sense)(struct nvkm_therm *); > > @@ -105,7 +105,7 @@ int nv50_fan_pwm_get(struct nvkm_therm *, int, u32 *, > u32 *); > int nv50_fan_pwm_set(struct nvkm_therm *, int, u32, u32); > int nv50_fan_pwm_clock(struct nvkm_therm *, int); > > -int g84_temp_get(struct nvkm_therm *); > +int g84_temp_get(struct nvkm_therm *, int *); > void g84_sensor_setup(struct nvkm_therm *); > void g84_therm_fini(struct nvkm_therm *); > > diff --git a/drm/nouveau/nvkm/subdev/therm/temp.c > b/drm/nouveau/nvkm/subdev/therm/temp.c > index ddb2b2c6..5ec2dfb3 100644 > --- a/drm/nouveau/nvkm/subdev/therm/temp.c > +++ b/drm/nouveau/nvkm/subdev/therm/temp.c > @@ -86,7 +86,10 @@ nvkm_therm_sensor_event(struct nvkm_therm *therm, enum > nvkm_therm_thrs thrs, > static const char * const thresholds[] = { > "fanboost", "downclock", "critical", "shutdown" > }; > - int temperature = therm->func->temp_get(therm); > + int temperature; > + > + if (therm->func->temp_get(therm, &temperature) < 0) > + return; > > if (thrs < 0 || thrs > 3) > return; > @@ -140,7 +143,10 @@ nvkm_therm_threshold_hyst_polling(struct nvkm_therm > *therm, > { > enum nvkm_therm_thrs_direction direction; > enum nvkm_therm_thrs_state prev_state, new_state; > - int temp = therm->func->temp_get(therm); > + int temp; > + > + if (therm->func->temp_get(therm, &temp) < 0) > + return; > > prev_state = nvkm_therm_sensor_get_threshold_state(therm, thrs_name); > > @@ -166,6 +172,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm) > struct nvbios_therm_sensor *sensor = &therm->bios_sensor; > struct nvkm_timer *tmr = therm->subdev.device->timer; > unsigned long flags; > + int val; > > spin_lock_irqsave(&therm->sensor.alarm_program_lock, flags); > > @@ -185,7 +192,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm) > spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags); > > /* schedule the next poll in one second */ > - if (therm->func->temp_get(therm) >= 0) > + if (therm->func->temp_get(therm, &val) >= 0) > nvkm_timer_alarm(tmr, 1000000000ULL, alarm); > } > > @@ -227,9 +234,10 @@ nvkm_therm_sensor_fini(struct nvkm_therm *therm, bool > suspend) > void > nvkm_therm_sensor_preinit(struct nvkm_therm *therm) > { > + int val; > const char *sensor_avail = "yes"; > > - if (therm->func->temp_get(therm) < 0) > + if (therm->func->temp_get(therm, &val) < 0) > sensor_avail = "no"; > > nvkm_debug(&therm->subdev, "internal sensor: %s\n", sensor_avail); > -- > 2.14.1 > > _______________________________________________ > Nouveau mailing list > Nouveau@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/nouveau
signature.asc
Description: PGP signature
_______________________________________________ Nouveau mailing list Nouveau@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/nouveau