On Thu, Jan 04, 2018 at 12:32:04PM +0530, Viresh Kumar wrote:
> This implements the debugfs interface for thermal cooling devices and
> exposes some pretty useful statistics. These statistics have proven to
> be quite useful specially while doing benchmarks related to the task
> scheduler, where we want to make sure that nothing has disrupted the
> test, specially the cooling device which may have put constraints on the
> CPUs. The information exposed here tells us to what extent the CPUs were
> constrained by the thermal framework.
> 
> The read-only "transitions" file is per cooling device and it shows the
> total number of cooling state transitions the device has gone through
> since the time the cooling device is registered or the time when
> statistics were reset last.
> 
> The read-only "time_in_state/stateN" file is per cooling state and it
> shows the time spent by the device in the respective cooling state.
> 
> The write-only "reset" file is used to reset the statistics.
> 
> This is how the directory structure looks like for a single cooling
> device:
> 
> $ ls -R /sys/kernel/debug/thermal/
> /sys/kernel/debug/thermal/:
> cooling_device0
> 
> /sys/kernel/debug/thermal/cooling_device0:
> reset  time_in_state_ms  transitions
> 
> /sys/kernel/debug/thermal/cooling_device0/time_in_state_ms:
> state0  state1  state2  state3

I would prefer this to go into sysfs. Reason is mainly because
such stats are also useful on production systems, specially for
collecting how a policy / deployment behaves across production
population.

Cheers,

> 
> This is tested on ARM 32-bit Hisilicon hikey620 board running Ubuntu and
> ARM 64-bit Hisilicon hikey960 board running Android.
> 
> Signed-off-by: Viresh Kumar <[email protected]>
> ---
>  drivers/thermal/Makefile          |   1 +
>  drivers/thermal/thermal_core.c    |   6 ++
>  drivers/thermal/thermal_core.h    |  18 ++++
>  drivers/thermal/thermal_debugfs.c | 167 
> ++++++++++++++++++++++++++++++++++++++
>  drivers/thermal/thermal_helpers.c |   5 +-
>  drivers/thermal/thermal_sysfs.c   |   1 +
>  include/linux/thermal.h           |   1 +
>  7 files changed, 198 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/thermal/thermal_debugfs.c
> 
> diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
> index 610344eb3e03..629f74e73871 100644
> --- a/drivers/thermal/Makefile
> +++ b/drivers/thermal/Makefile
> @@ -6,6 +6,7 @@
>  obj-$(CONFIG_THERMAL)                += thermal_sys.o
>  thermal_sys-y                        += thermal_core.o thermal_sysfs.o \
>                                       thermal_helpers.o
> +obj-$(CONFIG_DEBUG_FS)               += thermal_debugfs.o
>  
>  # interface to/from other layers providing sensors
>  thermal_sys-$(CONFIG_THERMAL_HWMON)          += thermal_hwmon.o
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 2b1b0ba393a4..bcc34648580f 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -997,6 +997,8 @@ __thermal_cooling_device_register(struct device_node *np,
>                                                  THERMAL_EVENT_UNSPECIFIED);
>       mutex_unlock(&thermal_list_lock);
>  
> +     thermal_cooling_device_setup_debugfs(cdev);
> +
>       return cdev;
>  }
>  
> @@ -1104,6 +1106,7 @@ void thermal_cooling_device_unregister(struct 
> thermal_cooling_device *cdev)
>  
>       mutex_unlock(&thermal_list_lock);
>  
> +     thermal_cooling_device_remove_debugfs(cdev);
>       ida_simple_remove(&thermal_cdev_ida, cdev->id);
>       device_unregister(&cdev->device);
>  }
> @@ -1544,6 +1547,8 @@ static int __init thermal_init(void)
>               pr_warn("Thermal: Can not register suspend notifier, return 
> %d\n",
>                       result);
>  
> +     thermal_debugfs_register();
> +
>       return 0;
>  
>  exit_netlink:
> @@ -1563,6 +1568,7 @@ static int __init thermal_init(void)
>  
>  static void __exit thermal_exit(void)
>  {
> +     thermal_debugfs_unregister();
>       unregister_pm_notifier(&thermal_pm_nb);
>       of_thermal_destroy_zones();
>       genetlink_exit();
> diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
> index 27e3b1df7360..3a8d50aa32dc 100644
> --- a/drivers/thermal/thermal_core.h
> +++ b/drivers/thermal/thermal_core.h
> @@ -151,4 +151,22 @@ of_thermal_get_trip_points(struct thermal_zone_device 
> *tz)
>  }
>  #endif
>  
> +#ifdef CONFIG_DEBUG_FS
> +void thermal_debugfs_register(void);
> +void thermal_debugfs_unregister(void);
> +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device 
> *cdev);
> +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device 
> *cdev);
> +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device 
> *cdev,
> +                                        unsigned long new_state);
> +#else
> +static inline void thermal_debugfs_register(void) {}
> +static inline void thermal_debugfs_unregister(void) {}
> +static inline void
> +thermal_cooling_device_setup_debugfs(struct thermal_cooling_device *cdev) {}
> +static inline void
> +thermal_cooling_device_remove_debugfs(struct thermal_cooling_device *cdev) {}
> +static inline void
> +thermal_cooling_device_debugfs_update(struct thermal_cooling_device *cdev,
> +                                   unsigned long new_state) {}
> +#endif /* debugfs */
>  #endif /* __THERMAL_CORE_H__ */
> diff --git a/drivers/thermal/thermal_debugfs.c 
> b/drivers/thermal/thermal_debugfs.c
> new file mode 100644
> index 000000000000..077684197250
> --- /dev/null
> +++ b/drivers/thermal/thermal_debugfs.c
> @@ -0,0 +1,167 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Manages debugfs interface for thermal devices.
> + *
> + * Copyright (C) 2018 Linaro.
> + * Viresh Kumar <[email protected]>
> + */
> +
> +#include <linux/device.h>
> +#include <linux/debugfs.h>
> +#include <linux/ktime.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +#include "thermal_core.h"
> +
> +struct cooling_time_in_state {
> +     ktime_t time;
> +     struct cooling_dev_debugfs_data *data;
> +};
> +
> +struct cooling_dev_debugfs_data {
> +     struct dentry *dentry;
> +     unsigned int total_trans;
> +     unsigned long state;
> +     unsigned long max_states;
> +     ktime_t last_time;
> +     struct cooling_time_in_state *time_in_state;
> +};
> +
> +static struct dentry *rootdir;
> +static DEFINE_SPINLOCK(lock);
> +
> +static void update_time_in_state(struct cooling_dev_debugfs_data *data)
> +{
> +     ktime_t now = ktime_get(), delta;
> +
> +     delta = ktime_sub(now, data->last_time);
> +     data->time_in_state[data->state].time =
> +             ktime_add(data->time_in_state[data->state].time, delta);
> +     data->last_time = now;
> +}
> +
> +void thermal_cooling_device_debugfs_update(struct thermal_cooling_device 
> *cdev,
> +                                        unsigned long new_state)
> +{
> +     struct cooling_dev_debugfs_data *data = cdev->debugfs;
> +
> +     if (!data)
> +             return;
> +
> +     spin_lock(&lock);
> +     update_time_in_state(data);
> +     data->state = new_state;
> +     data->total_trans++;
> +     spin_unlock(&lock);
> +}
> +
> +static int read_time_in_state(void *ptr, u64 *val)
> +{
> +     struct cooling_time_in_state *time_in_state = ptr;
> +
> +     spin_lock(&lock);
> +     update_time_in_state(time_in_state->data);
> +     *val = ktime_to_ms(time_in_state->time);
> +     spin_unlock(&lock);
> +
> +     return 0;
> +}
> +
> +DEFINE_DEBUGFS_ATTRIBUTE(time_in_state_fops, read_time_in_state, NULL, 
> "%llu\n");
> +
> +static int reset_stats(void *ptr, u64 val)
> +{
> +     struct cooling_dev_debugfs_data *data = ptr;
> +     int i;
> +
> +     spin_lock(&lock);
> +
> +     data->total_trans = 0;
> +     data->last_time = ktime_get();
> +
> +     for (i = 0; i < data->max_states; i++)
> +             data->time_in_state[i].time = ktime_set(0, 0);
> +
> +     spin_unlock(&lock);
> +
> +     return 0;
> +}
> +
> +DEFINE_DEBUGFS_ATTRIBUTE(reset_fops, NULL, reset_stats, "%llu\n");
> +
> +void thermal_cooling_device_setup_debugfs(struct thermal_cooling_device 
> *cdev)
> +{
> +     struct cooling_dev_debugfs_data *data;
> +     struct dentry *dentry;
> +     unsigned long states;
> +     int ret, size, i;
> +     char name[NAME_MAX];
> +
> +     ret = cdev->ops->get_max_state(cdev, &states);
> +     if (ret)
> +             return;
> +
> +     size = sizeof(*data);
> +     size += sizeof(*data->time_in_state) * states;
> +
> +     data = kzalloc(size, GFP_KERNEL);
> +     if (!data)
> +             return;
> +
> +     data->time_in_state = (struct cooling_time_in_state *)(data + 1);
> +     cdev->debugfs = data;
> +     data->last_time = ktime_get();
> +     data->max_states = states;
> +
> +     data->dentry = debugfs_create_dir(dev_name(&cdev->device), rootdir);
> +     if (!data->dentry)
> +             goto free_data;
> +
> +     debugfs_create_file_unsafe("reset", 0666, data->dentry, data, 
> &reset_fops);
> +     debugfs_create_u32("transitions", 0444, data->dentry,
> +                        &data->total_trans);
> +
> +     dentry = debugfs_create_dir("time_in_state_ms", data->dentry);
> +     if (!dentry)
> +             goto free_data_dentry;
> +
> +     for (i = 0; i < states; i++) {
> +             data->time_in_state[i].data = data;
> +             snprintf(name, NAME_MAX, "state%u", i);
> +             debugfs_create_file_unsafe(name, 0444, dentry,
> +                                        &data->time_in_state[i],
> +                                        &time_in_state_fops);
> +     }
> +
> +     return;
> +
> +free_data_dentry:
> +     debugfs_remove_recursive(data->dentry);
> +free_data:
> +     kfree(data);
> +     cdev->debugfs = NULL;
> +}
> +
> +void thermal_cooling_device_remove_debugfs(struct thermal_cooling_device 
> *cdev)
> +{
> +     struct cooling_dev_debugfs_data *data = cdev->debugfs;
> +
> +     if (!data)
> +             return;
> +
> +     debugfs_remove_recursive(data->dentry);
> +     kfree(data);
> +     cdev->debugfs = NULL;
> +}
> +
> +void thermal_debugfs_register(void)
> +{
> +     rootdir = debugfs_create_dir("thermal", NULL);
> +}
> +
> +void thermal_debugfs_unregister(void)
> +{
> +     debugfs_remove_recursive(rootdir);
> +     rootdir = NULL;
> +}
> diff --git a/drivers/thermal/thermal_helpers.c 
> b/drivers/thermal/thermal_helpers.c
> index 8cdf75adcce1..8cfb2f4e7ea3 100644
> --- a/drivers/thermal/thermal_helpers.c
> +++ b/drivers/thermal/thermal_helpers.c
> @@ -187,7 +187,10 @@ void thermal_cdev_update(struct thermal_cooling_device 
> *cdev)
>               if (instance->target > target)
>                       target = instance->target;
>       }
> -     cdev->ops->set_cur_state(cdev, target);
> +
> +     if (!cdev->ops->set_cur_state(cdev, target))
> +             thermal_cooling_device_debugfs_update(cdev, target);
> +
>       cdev->updated = true;
>       mutex_unlock(&cdev->lock);
>       trace_cdev_update(cdev, target);
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index fb80c96d8f73..be997ce3f32d 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -722,6 +722,7 @@ thermal_cooling_device_cur_state_store(struct device *dev,
>       result = cdev->ops->set_cur_state(cdev, state);
>       if (result)
>               return result;
> +     thermal_cooling_device_debugfs_update(cdev, state);
>       return count;
>  }
>  
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 8c5302374eaa..338d165cefef 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -148,6 +148,7 @@ struct thermal_cooling_device {
>       struct device device;
>       struct device_node *np;
>       void *devdata;
> +     void *debugfs;
>       const struct thermal_cooling_device_ops *ops;
>       bool updated; /* true if the cooling device does not need update */
>       struct mutex lock; /* protect thermal_instances list */
> -- 
> 2.15.0.194.g9af6a3dea062
> 

Reply via email to