S.M.A.R.T. temperature sensors have been supported for
years by userspace tools such as smarttools.

The temperature readout is however also a good fit for
Linux' hwmon subsystem. By adding a hwmon interface to dig
out SMART parameter 194, we can expose the drive temperature
as a standard hwmon sensor.

The idea came about when experimenting with NAS enclosures
that lack their own on-board sensors but instead piggy-back
the sensor found in the harddrive, if any, to decide on a
policy for driving the on-board fan.

The kernel thermal subsystem supports defining a thermal
policy for the enclosure using the device tree, see e.g.:
arch/arm/boot/dts/gemini-dlink-dns-313.dts
but this requires a proper hwmon sensor integrated with
the kernel.

This is a first attempt at providing a kernel-internal
hwmon sensor for ATA drives. It is possible to do the
same for SCSI, NVME etc, but their protocols and
peculiarities seem to require a per-subsystem implementation.
They would all end up in the same namespace using the
SCSI name such as "sd_0:0:0:0".

With this driver, the hard disk temperatur can be read from
sysfs:

 > cd /sys/class/hwmon/hwmon0/
 > cat temp1_input
 38

This likely means that they can also be handled by
userspace tools such as lm_sensors in a uniform way
without need for any special tools such as "hddtemp"
(which seems dormant) though I haven't tested it.

This driver does not block any simultaneous use of
other SMART userspace tools, it's a both/and approach,
not either/or.

Signed-off-by: Linus Walleij <linus.wall...@linaro.org>
---
This is just me having some idea, so I wanted to toss it
out there in case people think it is useful. If you want
to kill the idea right now before I get any further with
it, this is the time to pitch in. You can also say if
you like it.

I included the smartmontools mailing list on the review,
it seemed relevant.
---
 drivers/ata/Kconfig        |  13 ++
 drivers/ata/Makefile       |   1 +
 drivers/ata/libata-hwmon.c | 420 +++++++++++++++++++++++++++++++++++++
 drivers/ata/libata-hwmon.h |  15 ++
 drivers/ata/libata-scsi.c  |   2 +
 5 files changed, 451 insertions(+)
 create mode 100644 drivers/ata/libata-hwmon.c
 create mode 100644 drivers/ata/libata-hwmon.h

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 2b16e7c8fff3..8349101c7e53 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -59,6 +59,19 @@ config ATA_ACPI
          You can disable this at kernel boot time by using the
          option libata.noacpi=1
 
+config ATA_HWMON
+       bool "ATA S.M.A.R.T. HWMON support"
+       depends on HWMON
+       help
+         This options compiles in code to support temperature reading
+         from an ATA device using the S.M.A.R.T. (Self-Monitoring,
+         Analysis and Reporting Technology) support for temperature
+         sensors found in some hard drives. The drive will be probed
+         to figure out if it has a temperature sensor, and if it does
+         the kernel hardware monitor framework will be utilized to
+         interact with the sensor. This work orthogonal to any userspace
+         S.M.A.R.T. access tools.
+
 config SATA_ZPODD
        bool "SATA Zero Power Optical Disc Drive (ZPODD) support"
        depends on ATA_ACPI && PM
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index d21cdd83f7ab..7a22b27c66c0 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -126,3 +126,4 @@ libata-$(CONFIG_ATA_SFF)    += libata-sff.o
 libata-$(CONFIG_SATA_PMP)      += libata-pmp.o
 libata-$(CONFIG_ATA_ACPI)      += libata-acpi.o
 libata-$(CONFIG_SATA_ZPODD)    += libata-zpodd.o
+libata-$(CONFIG_ATA_HWMON)     += libata-hwmon.o
diff --git a/drivers/ata/libata-hwmon.c b/drivers/ata/libata-hwmon.c
new file mode 100644
index 000000000000..fa1e4e472625
--- /dev/null
+++ b/drivers/ata/libata-hwmon.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hwmon client for ATA S.M.A.R.T. hard disk drivers
+ * (C) 2018 Linus Walleij
+ *
+ * This code is based on know-how and examples from the
+ * smartmontools by Bruce Allen, Christian Franke et al.
+ * (C) 2002-2018
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/hwmon.h>
+#include <linux/ata.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+
+#include "libata-hwmon.h"
+
+#define ATA_MAX_SMART_ATTRS 30
+#define SMART_TEMP_PROP_194 194
+
+enum ata_temp_format {
+       ATA_TEMP_FMT_TT_XX_00_00_00_00,
+       ATA_TEMP_FMT_TT_XX_LL_HH_00_00,
+       ATA_TEMP_FMT_TT_LL_HH_00_00_00,
+       ATA_TEMP_FMT_TT_XX_LL_XX_HH_XX,
+       ATA_TEMP_FMT_TT_XX_HH_XX_LL_XX,
+       ATA_TEMP_FMT_TT_XX_LL_HH_CC_CC,
+       ATA_TEMP_FMT_UNKNOWN,
+};
+
+/**
+ * struct ata_hwmon - device instance state
+ * @hwmon_dev: associated hwmon device
+ */
+struct ata_hwmon {
+       struct device *dev;
+       struct device *hwmon_dev;
+       struct scsi_device *sdev;
+       enum ata_temp_format tfmt;
+};
+
+static umode_t ata_hwmon_is_visible(const void *data,
+                                   enum hwmon_sensor_types type,
+                                   u32 attr, int channel)
+{
+       switch (type) {
+       case hwmon_temp:
+               switch (attr) {
+               case hwmon_temp_input:
+               case hwmon_temp_min:
+               case hwmon_temp_max:
+                       return S_IRUGO;
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
+static int check_temp_word(u16 word)
+{
+       if (word <= 0x7f)
+               return 0x11; /* >= 0, signed byte or word */
+       if (word <= 0xff)
+               return 0x01; /* < 0, signed byte */
+       if (word > 0xff80)
+               return 0x10; /* < 0, signed word */
+       return 0x00;
+}
+
+static bool ata_check_temp_range(int t, u8 t1, u8 t2)
+{
+       int lo = (s8)t1;
+       int hi = (s8)t2;
+
+       /* This is obviously wrong */
+       if (lo > hi)
+               return false;
+
+       /*
+        * If -60 <= lo <= t <= hi <= 120 and
+        * and NOT lo == -1 and hi <= 0, then we have valid lo and hi
+        */
+       if (-60 <= lo && lo <= t && t <= hi && hi <= 120
+           && !(lo == -1 && hi <= 0)) {
+               return true;
+       }
+       return false;
+}
+
+static int ata_hwmon_detect_tempformat(struct ata_hwmon *ata, u8 *raw)
+{
+       s8 t;
+       u16 w0, w1, w2;
+       int ctw0;
+
+       /*
+        * Interpret the RAW temperature data:
+        * raw[0] is the temperature given as signed u8 on all known drives
+        *
+        * Search for possible min/max values
+        * This algorithm is a modified version from the smartmontools.
+        *
+        * [0][1][2][3][4][5] raw[]
+        * [ 0 ] [ 1 ] [ 2 ] word[]
+        * TT xx LL xx HH xx  Hitachi/HGST
+        * TT xx HH xx LL xx  Kingston SSDs
+        * TT xx LL HH 00 00  Maxtor, Samsung, Seagate, Toshiba
+        * TT LL HH 00 00 00  WDC
+        * TT xx LL HH CC CC  WDC, CCCC=over temperature count
+        * (xx = 00/ff, possibly sign extension of lower byte)
+        *
+        * TODO: detect the 10x temperatures found on some Samsung
+        * drives. struct scsi_device contains manufacturer and model
+        * information.
+        */
+       w0 = raw[0] | raw[1] << 16;
+       w1 = raw[2] | raw[3] << 16;
+       w2 = raw[4] | raw[5] << 16;
+       t = (s8)raw[0];
+
+       /* If this is != 0, then w0 may contain something useful */
+       ctw0 = check_temp_word(w0);
+
+       /* This checks variants with zero in [4] [5] */
+       if (!w2) {
+               /* TT xx 00 00 00 00 */
+               if (!w1 && ctw0)
+                       ata->tfmt = ATA_TEMP_FMT_TT_XX_00_00_00_00;
+               /* TT xx LL HH 00 00 */
+               else if (ctw0 &&
+                        ata_check_temp_range(t, raw[2], raw[3]))
+                       ata->tfmt = ATA_TEMP_FMT_TT_XX_LL_HH_00_00;
+               /* TT LL HH 00 00 00 */
+               else if (!raw[3] &&
+                        ata_check_temp_range(t, raw[1], raw[2]))
+                       ata->tfmt = ATA_TEMP_FMT_TT_LL_HH_00_00_00;
+               else
+                       return -EINVAL;
+       } else if (ctw0) {
+               /*
+                * TT xx LL xx HH xx
+                * What the expression below does is to check that each word
+                * formed by [0][1], [2][3], and [4][5] is something little-
+                * endian s8 or s16 that could be meaningful.
+                */
+               if ((ctw0 & check_temp_word(w1) & check_temp_word(w2)) != 0x00)
+                       if (ata_check_temp_range(t, raw[2], raw[4]))
+                               ata->tfmt = ATA_TEMP_FMT_TT_XX_LL_XX_HH_XX;
+                       else if (ata_check_temp_range(t, raw[4], raw[2]))
+                               ata->tfmt = ATA_TEMP_FMT_TT_XX_HH_XX_LL_XX;
+                       else
+                               return -EINVAL;
+               /*
+                * TT xx LL HH CC CC
+                * Make sure the CC CC word is at least not negative, and that
+                * the max temperature is something >= 40, then it is probably
+                * the right format.
+                */
+               else if (w2 < 0x7fff) {
+                       if (ata_check_temp_range(t, raw[2], raw[3]) &&
+                           raw[3] >= 40)
+                               ata->tfmt = ATA_TEMP_FMT_TT_XX_LL_HH_CC_CC;
+                       else
+                               return -EINVAL;
+               } else {
+                       return -EINVAL;
+               }
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void ata_hwmon_convert_temperatures(struct ata_hwmon *ata, u8 *raw,
+                                          int *t, int *lo, int *hi)
+{
+       *t = (s8)raw[0];
+
+       switch (ata->tfmt) {
+       case ATA_TEMP_FMT_TT_XX_00_00_00_00:
+               *lo = 0;
+               *hi = 0;
+               break;
+       case ATA_TEMP_FMT_TT_XX_LL_HH_00_00:
+               *lo = (s8)raw[2];
+               *hi = (s8)raw[3];
+               break;
+       case ATA_TEMP_FMT_TT_LL_HH_00_00_00:
+               *lo = (s8)raw[1];
+               *hi = (s8)raw[2];
+               break;
+       case ATA_TEMP_FMT_TT_XX_LL_XX_HH_XX:
+               *lo = (s8)raw[2];
+               *hi = (s8)raw[4];
+               break;
+       case ATA_TEMP_FMT_TT_XX_HH_XX_LL_XX:
+               *lo = (s8)raw[4];
+               *hi = (s8)raw[2];
+               break;
+       case ATA_TEMP_FMT_TT_XX_LL_HH_CC_CC:
+               *lo = (s8)raw[2];
+               *hi = (s8)raw[3];
+               break;
+       case ATA_TEMP_FMT_UNKNOWN:
+               *lo = 0;
+               *hi = 0;
+               break;
+       }
+}
+
+static int ata_hwmon_read_temp(struct ata_hwmon *ata, int *temp,
+                              int *min, int *max)
+{
+       u8 scsi_cmd[MAX_COMMAND_SIZE];
+       int cmd_result;
+       u8 *argbuf = NULL;
+       struct scsi_sense_hdr sshdr;
+       u8 raw[6];
+       int ret;
+       u8 csum;
+       int i;
+
+       /* Send ATA command to read SMART values */
+       memset(scsi_cmd, 0, sizeof(scsi_cmd));
+       scsi_cmd[0] = ATA_16;
+       scsi_cmd[1] = (4 << 1); /* PIO Data-in */
+       /*
+        * No off.line or cc, read from dev, block count in sector count
+        * field.
+        */
+       scsi_cmd[2] = 0x0e;
+       scsi_cmd[4] = ATA_SMART_READ_VALUES;
+       scsi_cmd[6] = 1; /* Read 1 sector */
+       scsi_cmd[8] = 0; /* args[1]; */
+       scsi_cmd[10] = ATA_SMART_LBAM_PASS;
+       scsi_cmd[12] = ATA_SMART_LBAH_PASS;
+       scsi_cmd[14] = ATA_CMD_SMART;
+
+       argbuf = kmalloc(ATA_SECT_SIZE, GFP_KERNEL);
+       cmd_result = scsi_execute(ata->sdev, scsi_cmd, DMA_FROM_DEVICE,
+                                 argbuf, ATA_SECT_SIZE,
+                                 NULL, &sshdr, (10*HZ), 5, 0, 0, NULL);
+       if (cmd_result) {
+               dev_err(ata->dev, "error %d reading SMART values from device\n",
+                       cmd_result);
+               ret = -EIO;
+               goto freebuf;
+       }
+
+       /* Checksum the read value table */
+       csum = 0;
+       for (i = 0; i < ATA_SECT_SIZE; i++)
+               csum += argbuf[i];
+       if (csum) {
+               dev_err(ata->dev, "checksum error reading SMART values\n");
+               ret = -EIO;
+               goto freebuf;
+       }
+
+       /* Loop over SMART attributes */
+       for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) {
+               u8 id;
+               u16 flags;
+               u8 curr;
+               u8 worst;
+               int j;
+
+               id = argbuf[2 + i * 12];
+               if (!id)
+                       continue;
+
+               flags = argbuf[3 + i * 12] | (argbuf[4 + i * 12] << 16);
+               /* Highest temperature since boot */
+               curr = argbuf[5 + i * 12];
+               /* Highest temperature ever */
+               worst = argbuf[6 + i * 12];
+               for (j = 0; j < 6; j++)
+                       raw[j] = argbuf[7 + i * 12 + j];
+               dev_dbg(ata->dev, "ID: %d, FLAGS: %04x, current %d, worst %d, "
+                       "RAW %02x %02x %02x %02x %02x %02x\n",
+                       id, flags, curr, worst,
+                       raw[0], raw[1], raw[2], raw[3], raw[4], raw[5]);
+
+               if (id == SMART_TEMP_PROP_194)
+                       break;
+       }
+       if (i == ATA_MAX_SMART_ATTRS) {
+               ret = -ENOTSUPP;
+               goto freebuf;
+       }
+
+       if (ata->tfmt == ATA_TEMP_FMT_UNKNOWN) {
+               ret = ata_hwmon_detect_tempformat(ata, raw);
+               if (ret) {
+                       dev_err(ata->dev,
+                               "unable to determine temperature format\n");
+                       ret = -ENOTSUPP;
+                       goto freebuf;
+               }
+       }
+
+       ata_hwmon_convert_temperatures(ata, raw, temp, min, max);
+       dev_dbg(ata->dev, "temp = %d, min = %d, max = %d\n",
+               *temp, *min, *max);
+
+       ret = 0;
+
+freebuf:
+       kfree(argbuf);
+       return ret;
+}
+
+static int ata_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+                         u32 attr, int channel, long *val)
+{
+       struct ata_hwmon *ata = dev_get_drvdata(dev);
+       int temp, min, max;
+       int ret;
+
+       if (type != hwmon_temp)
+               return -EINVAL;
+
+       ret = ata_hwmon_read_temp(ata, &temp, &min, &max);
+       if (ret)
+               return ret;
+
+       switch (attr) {
+       case hwmon_temp_input:
+               *val = temp;
+               break;
+       case hwmon_temp_min:
+               *val = min;
+               break;
+       case hwmon_temp_max:
+               *val = max;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static const struct hwmon_ops ata_hwmon_ops = {
+       .is_visible = ata_hwmon_is_visible,
+       .read = ata_hwmon_read,
+};
+
+static const u32 ata_hwmon_temp_config[] = {
+       HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX,
+       0,
+};
+
+static const struct hwmon_channel_info ata_hwmon_temp = {
+       .type = hwmon_temp,
+       .config = ata_hwmon_temp_config,
+};
+
+static const struct hwmon_channel_info *ata_hwmon_info[] = {
+       &ata_hwmon_temp,
+       NULL,
+};
+
+static const struct hwmon_chip_info ata_hwmon_devinfo = {
+       .ops = &ata_hwmon_ops,
+       .info = ata_hwmon_info,
+};
+
+int ata_hwmon_probe(struct scsi_device *sdev)
+{
+       struct device *dev = &sdev->sdev_gendev;
+       struct ata_hwmon *ata;
+       char *sname;
+       int t;
+       int dummy;
+       int ret;
+
+       ata = devm_kzalloc(dev, sizeof(*ata), GFP_KERNEL);
+       if (!ata)
+               return -ENOMEM;
+       ata->dev = dev;
+       ata->sdev = sdev;
+
+       /*
+        * If temperature reading is not supported in the SMART
+        * properties, we just bail out.
+        */
+       ata->tfmt = ATA_TEMP_FMT_UNKNOWN;
+       ret = ata_hwmon_read_temp(ata, &t, &dummy, &dummy);
+       if (ret == -ENOTSUPP)
+               return 0;
+       /* Any other error, return upward */
+       if (ret)
+               return ret;
+       dev_info(dev, "initial temperature %d degrees celsius\n", t);
+
+       /* Names the hwmon device something like "sd_0:0:0:0" */
+       sname = devm_kasprintf(dev, GFP_KERNEL, "sd_%s", dev_name(dev));
+       if (!sname)
+               return -ENOMEM;
+       ata->hwmon_dev =
+               devm_hwmon_device_register_with_info(dev, sname, ata,
+                                                    &ata_hwmon_devinfo,
+                                                    NULL);
+       if (IS_ERR(ata->hwmon_dev))
+               return PTR_ERR(ata->hwmon_dev);
+
+       dev_info(dev, "added hwmon sensor %s\n", sname);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ata_hwmon_probe);
diff --git a/drivers/ata/libata-hwmon.h b/drivers/ata/libata-hwmon.h
new file mode 100644
index 000000000000..df56ba456345
--- /dev/null
+++ b/drivers/ata/libata-hwmon.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <scsi/scsi_device.h>
+
+#ifdef CONFIG_ATA_HWMON
+
+int ata_hwmon_probe(struct scsi_device *sdev);
+
+#else
+
+static inline int ata_hwmon_probe(struct scsi_device *sdev)
+{
+       return 0;
+}
+
+#endif
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 55b890d19780..a83075e4d3b3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -54,6 +54,7 @@
 
 #include "libata.h"
 #include "libata-transport.h"
+#include "libata-hwmon.h"
 
 #define ATA_SCSI_RBUF_SIZE     4096
 
@@ -4594,6 +4595,7 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
                        if (!IS_ERR(sdev)) {
                                dev->sdev = sdev;
                                scsi_device_put(sdev);
+                               ata_hwmon_probe(sdev);
                        } else {
                                dev->sdev = NULL;
                        }
-- 
2.17.1

Reply via email to