Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure

2019-10-18 Thread Vincent Guittot
Hi Thara,

On Thu, 17 Oct 2019 at 18:40, Thara Gopinath  wrote:
>
> On 10/17/2019 04:44 AM, Vincent Guittot wrote:
> > Hi Thara,
> >
> > On Wed, 16 Oct 2019 at 23:22, Thara Gopinath  
> > wrote:
> >>
> >> Hi Vincent,
> >>
> >> Thanks for the review
> >> On 10/14/2019 11:50 AM, Vincent Guittot wrote:
> >>> Hi Thara,
> >>>
> >>> On Mon, 14 Oct 2019 at 02:58, Thara Gopinath  
> >>> wrote:
> 
>  Add thermal.c and thermal.h files that provides interface
>  APIs to initialize, update/average, track, accumulate and decay
>  thermal pressure per cpu basis. A per cpu structure max_capacity_info is
>  introduced to keep track of instantaneous per cpu thermal pressure.
>  Thermal pressure is the delta between max_capacity and cap_capacity.
>  API update_periodic_maxcap is called for periodic accumulate and decay
>  of the thermal pressure. It is to to be called from a periodic tick
>  function. This API calculates the delta between max_capacity and
>  cap_capacity and passes on the delta to update_thermal_avg to do the
>  necessary accumulate, decay and average. API update_maxcap_capacity is 
>  for
>  the system to update the thermal pressure by updating cap_capacity.
>  Considering, update_periodic_maxcap reads cap_capacity and
>  update_maxcap_capacity writes into cap_capacity, one can argue for
>  some sort of locking mechanism to avoid a stale value.
>  But considering update_periodic_maxcap can be called from a system
>  critical path like scheduler tick function, a locking mechanism is not
>  ideal. This means that it is possible the value used to
>  calculate average thermal pressure for a cpu can be stale for upto 1
>  tick period.
> 
>  Signed-off-by: Thara Gopinath 
>  ---
>   include/linux/sched.h  | 14 +++
>   kernel/sched/Makefile  |  2 +-
>   kernel/sched/thermal.c | 66 
>  ++
>   kernel/sched/thermal.h | 13 ++
>   4 files changed, 94 insertions(+), 1 deletion(-)
>   create mode 100644 kernel/sched/thermal.c
>   create mode 100644 kernel/sched/thermal.h
> 
>  diff --git a/include/linux/sched.h b/include/linux/sched.h
>  index 2c2e56b..875ce2b 100644
>  --- a/include/linux/sched.h
>  +++ b/include/linux/sched.h
>  @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs 
>  *regs)
> 
>   #endif
> 
>  +#ifdef CONFIG_SMP
>  +void update_maxcap_capacity(int cpu, u64 capacity);
>  +
>  +void populate_max_capacity_info(void);
>  +#else
>  +static inline void update_maxcap_capacity(int cpu, u64 capacity)
>  +{
>  +}
>  +
>  +static inline void populate_max_capacity_info(void)
>  +{
>  +}
>  +#endif
>  +
>   const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
>   char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int 
>  len);
>   int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
>  diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
>  index 21fb5a5..4d3b820 100644
>  --- a/kernel/sched/Makefile
>  +++ b/kernel/sched/Makefile
>  @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
>   obj-y += idle.o fair.o rt.o deadline.o
>   obj-y += wait.o wait_bit.o swait.o completion.o
> 
>  -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o 
>  pelt.o
>  +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o 
>  pelt.o thermal.o
>   obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
>   obj-$(CONFIG_SCHEDSTATS) += stats.o
>   obj-$(CONFIG_SCHED_DEBUG) += debug.o
>  diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
>  new file mode 100644
>  index 000..5f0b2d4
>  --- /dev/null
>  +++ b/kernel/sched/thermal.c
>  @@ -0,0 +1,66 @@
>  +// SPDX-License-Identifier: GPL-2.0
>  +/*
>  + * Sceduler Thermal Interactions
>  + *
>  + *  Copyright (C) 2018 Linaro, Inc., Thara Gopinath 
>  
>  + */
>  +
>  +#include 
>  +#include "sched.h"
>  +#include "pelt.h"
>  +#include "thermal.h"
>  +
>  +struct max_capacity_info {
>  +   unsigned long max_capacity;
>  +   unsigned long cap_capacity;
>  +};
>  +
>  +static DEFINE_PER_CPU(struct max_capacity_info, max_cap);
>  +
>  +void update_maxcap_capacity(int cpu, u64 capacity)
>  +{
>  +   struct max_capacity_info *__max_cap;
>  +   unsigned long __capacity;
>  +
>  +   __max_cap = (_cpu(max_cap, cpu));
>  +   if (!__max_cap) {
>  +   pr_err("no max_capacity_info structure for cpu %d\n", 
>  cpu);
>  +   return;
>  +   }
>  +
>  +   /* Normalize the capacity */
>  +   __capacity = (capacity * 

Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure

2019-10-17 Thread Thara Gopinath
On 10/17/2019 04:44 AM, Vincent Guittot wrote:
> Hi Thara,
> 
> On Wed, 16 Oct 2019 at 23:22, Thara Gopinath  
> wrote:
>>
>> Hi Vincent,
>>
>> Thanks for the review
>> On 10/14/2019 11:50 AM, Vincent Guittot wrote:
>>> Hi Thara,
>>>
>>> On Mon, 14 Oct 2019 at 02:58, Thara Gopinath  
>>> wrote:

 Add thermal.c and thermal.h files that provides interface
 APIs to initialize, update/average, track, accumulate and decay
 thermal pressure per cpu basis. A per cpu structure max_capacity_info is
 introduced to keep track of instantaneous per cpu thermal pressure.
 Thermal pressure is the delta between max_capacity and cap_capacity.
 API update_periodic_maxcap is called for periodic accumulate and decay
 of the thermal pressure. It is to to be called from a periodic tick
 function. This API calculates the delta between max_capacity and
 cap_capacity and passes on the delta to update_thermal_avg to do the
 necessary accumulate, decay and average. API update_maxcap_capacity is for
 the system to update the thermal pressure by updating cap_capacity.
 Considering, update_periodic_maxcap reads cap_capacity and
 update_maxcap_capacity writes into cap_capacity, one can argue for
 some sort of locking mechanism to avoid a stale value.
 But considering update_periodic_maxcap can be called from a system
 critical path like scheduler tick function, a locking mechanism is not
 ideal. This means that it is possible the value used to
 calculate average thermal pressure for a cpu can be stale for upto 1
 tick period.

 Signed-off-by: Thara Gopinath 
 ---
  include/linux/sched.h  | 14 +++
  kernel/sched/Makefile  |  2 +-
  kernel/sched/thermal.c | 66 
 ++
  kernel/sched/thermal.h | 13 ++
  4 files changed, 94 insertions(+), 1 deletion(-)
  create mode 100644 kernel/sched/thermal.c
  create mode 100644 kernel/sched/thermal.h

 diff --git a/include/linux/sched.h b/include/linux/sched.h
 index 2c2e56b..875ce2b 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs 
 *regs)

  #endif

 +#ifdef CONFIG_SMP
 +void update_maxcap_capacity(int cpu, u64 capacity);
 +
 +void populate_max_capacity_info(void);
 +#else
 +static inline void update_maxcap_capacity(int cpu, u64 capacity)
 +{
 +}
 +
 +static inline void populate_max_capacity_info(void)
 +{
 +}
 +#endif
 +
  const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
  char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
  int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
 diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
 index 21fb5a5..4d3b820 100644
 --- a/kernel/sched/Makefile
 +++ b/kernel/sched/Makefile
 @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
  obj-y += idle.o fair.o rt.o deadline.o
  obj-y += wait.o wait_bit.o swait.o completion.o

 -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
 +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o 
 thermal.o
  obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
  obj-$(CONFIG_SCHEDSTATS) += stats.o
  obj-$(CONFIG_SCHED_DEBUG) += debug.o
 diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
 new file mode 100644
 index 000..5f0b2d4
 --- /dev/null
 +++ b/kernel/sched/thermal.c
 @@ -0,0 +1,66 @@
 +// SPDX-License-Identifier: GPL-2.0
 +/*
 + * Sceduler Thermal Interactions
 + *
 + *  Copyright (C) 2018 Linaro, Inc., Thara Gopinath 
 
 + */
 +
 +#include 
 +#include "sched.h"
 +#include "pelt.h"
 +#include "thermal.h"
 +
 +struct max_capacity_info {
 +   unsigned long max_capacity;
 +   unsigned long cap_capacity;
 +};
 +
 +static DEFINE_PER_CPU(struct max_capacity_info, max_cap);
 +
 +void update_maxcap_capacity(int cpu, u64 capacity)
 +{
 +   struct max_capacity_info *__max_cap;
 +   unsigned long __capacity;
 +
 +   __max_cap = (_cpu(max_cap, cpu));
 +   if (!__max_cap) {
 +   pr_err("no max_capacity_info structure for cpu %d\n", cpu);
 +   return;
 +   }
 +
 +   /* Normalize the capacity */
 +   __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >>
 +   
 SCHED_CAPACITY_SHIFT;
 +   pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, 
 __max_cap->cap_capacity, __capacity);
 +
 +   __max_cap->cap_capacity = __capacity;
 +}
 +
 +void populate_max_capacity_info(void)
 +{

Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure

2019-10-17 Thread Vincent Guittot
Hi Thara,

On Wed, 16 Oct 2019 at 23:22, Thara Gopinath  wrote:
>
> Hi Vincent,
>
> Thanks for the review
> On 10/14/2019 11:50 AM, Vincent Guittot wrote:
> > Hi Thara,
> >
> > On Mon, 14 Oct 2019 at 02:58, Thara Gopinath  
> > wrote:
> >>
> >> Add thermal.c and thermal.h files that provides interface
> >> APIs to initialize, update/average, track, accumulate and decay
> >> thermal pressure per cpu basis. A per cpu structure max_capacity_info is
> >> introduced to keep track of instantaneous per cpu thermal pressure.
> >> Thermal pressure is the delta between max_capacity and cap_capacity.
> >> API update_periodic_maxcap is called for periodic accumulate and decay
> >> of the thermal pressure. It is to to be called from a periodic tick
> >> function. This API calculates the delta between max_capacity and
> >> cap_capacity and passes on the delta to update_thermal_avg to do the
> >> necessary accumulate, decay and average. API update_maxcap_capacity is for
> >> the system to update the thermal pressure by updating cap_capacity.
> >> Considering, update_periodic_maxcap reads cap_capacity and
> >> update_maxcap_capacity writes into cap_capacity, one can argue for
> >> some sort of locking mechanism to avoid a stale value.
> >> But considering update_periodic_maxcap can be called from a system
> >> critical path like scheduler tick function, a locking mechanism is not
> >> ideal. This means that it is possible the value used to
> >> calculate average thermal pressure for a cpu can be stale for upto 1
> >> tick period.
> >>
> >> Signed-off-by: Thara Gopinath 
> >> ---
> >>  include/linux/sched.h  | 14 +++
> >>  kernel/sched/Makefile  |  2 +-
> >>  kernel/sched/thermal.c | 66 
> >> ++
> >>  kernel/sched/thermal.h | 13 ++
> >>  4 files changed, 94 insertions(+), 1 deletion(-)
> >>  create mode 100644 kernel/sched/thermal.c
> >>  create mode 100644 kernel/sched/thermal.h
> >>
> >> diff --git a/include/linux/sched.h b/include/linux/sched.h
> >> index 2c2e56b..875ce2b 100644
> >> --- a/include/linux/sched.h
> >> +++ b/include/linux/sched.h
> >> @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs 
> >> *regs)
> >>
> >>  #endif
> >>
> >> +#ifdef CONFIG_SMP
> >> +void update_maxcap_capacity(int cpu, u64 capacity);
> >> +
> >> +void populate_max_capacity_info(void);
> >> +#else
> >> +static inline void update_maxcap_capacity(int cpu, u64 capacity)
> >> +{
> >> +}
> >> +
> >> +static inline void populate_max_capacity_info(void)
> >> +{
> >> +}
> >> +#endif
> >> +
> >>  const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
> >>  char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
> >>  int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
> >> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
> >> index 21fb5a5..4d3b820 100644
> >> --- a/kernel/sched/Makefile
> >> +++ b/kernel/sched/Makefile
> >> @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
> >>  obj-y += idle.o fair.o rt.o deadline.o
> >>  obj-y += wait.o wait_bit.o swait.o completion.o
> >>
> >> -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
> >> +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o 
> >> thermal.o
> >>  obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
> >>  obj-$(CONFIG_SCHEDSTATS) += stats.o
> >>  obj-$(CONFIG_SCHED_DEBUG) += debug.o
> >> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
> >> new file mode 100644
> >> index 000..5f0b2d4
> >> --- /dev/null
> >> +++ b/kernel/sched/thermal.c
> >> @@ -0,0 +1,66 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +/*
> >> + * Sceduler Thermal Interactions
> >> + *
> >> + *  Copyright (C) 2018 Linaro, Inc., Thara Gopinath 
> >> 
> >> + */
> >> +
> >> +#include 
> >> +#include "sched.h"
> >> +#include "pelt.h"
> >> +#include "thermal.h"
> >> +
> >> +struct max_capacity_info {
> >> +   unsigned long max_capacity;
> >> +   unsigned long cap_capacity;
> >> +};
> >> +
> >> +static DEFINE_PER_CPU(struct max_capacity_info, max_cap);
> >> +
> >> +void update_maxcap_capacity(int cpu, u64 capacity)
> >> +{
> >> +   struct max_capacity_info *__max_cap;
> >> +   unsigned long __capacity;
> >> +
> >> +   __max_cap = (_cpu(max_cap, cpu));
> >> +   if (!__max_cap) {
> >> +   pr_err("no max_capacity_info structure for cpu %d\n", cpu);
> >> +   return;
> >> +   }
> >> +
> >> +   /* Normalize the capacity */
> >> +   __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >>
> >> +   
> >> SCHED_CAPACITY_SHIFT;
> >> +   pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, 
> >> __max_cap->cap_capacity, __capacity);
> >> +
> >> +   __max_cap->cap_capacity = __capacity;
> >> +}
> >> +
> >> +void populate_max_capacity_info(void)
> >> +{
> >> +   struct max_capacity_info *__max_cap;
> >> +   

Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure

2019-10-16 Thread Thara Gopinath
Hi Vincent,

Thanks for the review
On 10/14/2019 11:50 AM, Vincent Guittot wrote:
> Hi Thara,
> 
> On Mon, 14 Oct 2019 at 02:58, Thara Gopinath  
> wrote:
>>
>> Add thermal.c and thermal.h files that provides interface
>> APIs to initialize, update/average, track, accumulate and decay
>> thermal pressure per cpu basis. A per cpu structure max_capacity_info is
>> introduced to keep track of instantaneous per cpu thermal pressure.
>> Thermal pressure is the delta between max_capacity and cap_capacity.
>> API update_periodic_maxcap is called for periodic accumulate and decay
>> of the thermal pressure. It is to to be called from a periodic tick
>> function. This API calculates the delta between max_capacity and
>> cap_capacity and passes on the delta to update_thermal_avg to do the
>> necessary accumulate, decay and average. API update_maxcap_capacity is for
>> the system to update the thermal pressure by updating cap_capacity.
>> Considering, update_periodic_maxcap reads cap_capacity and
>> update_maxcap_capacity writes into cap_capacity, one can argue for
>> some sort of locking mechanism to avoid a stale value.
>> But considering update_periodic_maxcap can be called from a system
>> critical path like scheduler tick function, a locking mechanism is not
>> ideal. This means that it is possible the value used to
>> calculate average thermal pressure for a cpu can be stale for upto 1
>> tick period.
>>
>> Signed-off-by: Thara Gopinath 
>> ---
>>  include/linux/sched.h  | 14 +++
>>  kernel/sched/Makefile  |  2 +-
>>  kernel/sched/thermal.c | 66 
>> ++
>>  kernel/sched/thermal.h | 13 ++
>>  4 files changed, 94 insertions(+), 1 deletion(-)
>>  create mode 100644 kernel/sched/thermal.c
>>  create mode 100644 kernel/sched/thermal.h
>>
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 2c2e56b..875ce2b 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs *regs)
>>
>>  #endif
>>
>> +#ifdef CONFIG_SMP
>> +void update_maxcap_capacity(int cpu, u64 capacity);
>> +
>> +void populate_max_capacity_info(void);
>> +#else
>> +static inline void update_maxcap_capacity(int cpu, u64 capacity)
>> +{
>> +}
>> +
>> +static inline void populate_max_capacity_info(void)
>> +{
>> +}
>> +#endif
>> +
>>  const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
>>  char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
>>  int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
>> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
>> index 21fb5a5..4d3b820 100644
>> --- a/kernel/sched/Makefile
>> +++ b/kernel/sched/Makefile
>> @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
>>  obj-y += idle.o fair.o rt.o deadline.o
>>  obj-y += wait.o wait_bit.o swait.o completion.o
>>
>> -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
>> +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o 
>> thermal.o
>>  obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
>>  obj-$(CONFIG_SCHEDSTATS) += stats.o
>>  obj-$(CONFIG_SCHED_DEBUG) += debug.o
>> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
>> new file mode 100644
>> index 000..5f0b2d4
>> --- /dev/null
>> +++ b/kernel/sched/thermal.c
>> @@ -0,0 +1,66 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Sceduler Thermal Interactions
>> + *
>> + *  Copyright (C) 2018 Linaro, Inc., Thara Gopinath 
>> 
>> + */
>> +
>> +#include 
>> +#include "sched.h"
>> +#include "pelt.h"
>> +#include "thermal.h"
>> +
>> +struct max_capacity_info {
>> +   unsigned long max_capacity;
>> +   unsigned long cap_capacity;
>> +};
>> +
>> +static DEFINE_PER_CPU(struct max_capacity_info, max_cap);
>> +
>> +void update_maxcap_capacity(int cpu, u64 capacity)
>> +{
>> +   struct max_capacity_info *__max_cap;
>> +   unsigned long __capacity;
>> +
>> +   __max_cap = (_cpu(max_cap, cpu));
>> +   if (!__max_cap) {
>> +   pr_err("no max_capacity_info structure for cpu %d\n", cpu);
>> +   return;
>> +   }
>> +
>> +   /* Normalize the capacity */
>> +   __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >>
>> +   SCHED_CAPACITY_SHIFT;
>> +   pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, 
>> __max_cap->cap_capacity, __capacity);
>> +
>> +   __max_cap->cap_capacity = __capacity;
>> +}
>> +
>> +void populate_max_capacity_info(void)
>> +{
>> +   struct max_capacity_info *__max_cap;
>> +   u64 capacity;
>> +   int cpu;
>> +
>> +   for_each_possible_cpu(cpu) {
>> +   __max_cap = (_cpu(max_cap, cpu));
>> +   if (!__max_cap)
>> +   continue;
>> +   capacity = arch_scale_cpu_capacity(cpu);
>> +   __max_cap->max_capacity = capacity;
>> +

Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure

2019-10-14 Thread Vincent Guittot
Hi Thara,

On Mon, 14 Oct 2019 at 02:58, Thara Gopinath  wrote:
>
> Add thermal.c and thermal.h files that provides interface
> APIs to initialize, update/average, track, accumulate and decay
> thermal pressure per cpu basis. A per cpu structure max_capacity_info is
> introduced to keep track of instantaneous per cpu thermal pressure.
> Thermal pressure is the delta between max_capacity and cap_capacity.
> API update_periodic_maxcap is called for periodic accumulate and decay
> of the thermal pressure. It is to to be called from a periodic tick
> function. This API calculates the delta between max_capacity and
> cap_capacity and passes on the delta to update_thermal_avg to do the
> necessary accumulate, decay and average. API update_maxcap_capacity is for
> the system to update the thermal pressure by updating cap_capacity.
> Considering, update_periodic_maxcap reads cap_capacity and
> update_maxcap_capacity writes into cap_capacity, one can argue for
> some sort of locking mechanism to avoid a stale value.
> But considering update_periodic_maxcap can be called from a system
> critical path like scheduler tick function, a locking mechanism is not
> ideal. This means that it is possible the value used to
> calculate average thermal pressure for a cpu can be stale for upto 1
> tick period.
>
> Signed-off-by: Thara Gopinath 
> ---
>  include/linux/sched.h  | 14 +++
>  kernel/sched/Makefile  |  2 +-
>  kernel/sched/thermal.c | 66 
> ++
>  kernel/sched/thermal.h | 13 ++
>  4 files changed, 94 insertions(+), 1 deletion(-)
>  create mode 100644 kernel/sched/thermal.c
>  create mode 100644 kernel/sched/thermal.h
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 2c2e56b..875ce2b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs *regs)
>
>  #endif
>
> +#ifdef CONFIG_SMP
> +void update_maxcap_capacity(int cpu, u64 capacity);
> +
> +void populate_max_capacity_info(void);
> +#else
> +static inline void update_maxcap_capacity(int cpu, u64 capacity)
> +{
> +}
> +
> +static inline void populate_max_capacity_info(void)
> +{
> +}
> +#endif
> +
>  const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
>  char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
>  int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
> index 21fb5a5..4d3b820 100644
> --- a/kernel/sched/Makefile
> +++ b/kernel/sched/Makefile
> @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
>  obj-y += idle.o fair.o rt.o deadline.o
>  obj-y += wait.o wait_bit.o swait.o completion.o
>
> -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
> +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o 
> thermal.o
>  obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
>  obj-$(CONFIG_SCHEDSTATS) += stats.o
>  obj-$(CONFIG_SCHED_DEBUG) += debug.o
> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
> new file mode 100644
> index 000..5f0b2d4
> --- /dev/null
> +++ b/kernel/sched/thermal.c
> @@ -0,0 +1,66 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Sceduler Thermal Interactions
> + *
> + *  Copyright (C) 2018 Linaro, Inc., Thara Gopinath 
> 
> + */
> +
> +#include 
> +#include "sched.h"
> +#include "pelt.h"
> +#include "thermal.h"
> +
> +struct max_capacity_info {
> +   unsigned long max_capacity;
> +   unsigned long cap_capacity;
> +};
> +
> +static DEFINE_PER_CPU(struct max_capacity_info, max_cap);
> +
> +void update_maxcap_capacity(int cpu, u64 capacity)
> +{
> +   struct max_capacity_info *__max_cap;
> +   unsigned long __capacity;
> +
> +   __max_cap = (_cpu(max_cap, cpu));
> +   if (!__max_cap) {
> +   pr_err("no max_capacity_info structure for cpu %d\n", cpu);
> +   return;
> +   }
> +
> +   /* Normalize the capacity */
> +   __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >>
> +   SCHED_CAPACITY_SHIFT;
> +   pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, 
> __max_cap->cap_capacity, __capacity);
> +
> +   __max_cap->cap_capacity = __capacity;
> +}
> +
> +void populate_max_capacity_info(void)
> +{
> +   struct max_capacity_info *__max_cap;
> +   u64 capacity;
> +   int cpu;
> +
> +   for_each_possible_cpu(cpu) {
> +   __max_cap = (_cpu(max_cap, cpu));
> +   if (!__max_cap)
> +   continue;
> +   capacity = arch_scale_cpu_capacity(cpu);
> +   __max_cap->max_capacity = capacity;
> +   __max_cap->cap_capacity = capacity;
> +   pr_debug("cpu %d max capacity set to %ld\n", cpu, 
> __max_cap->max_capacity);
> +   }
> +}

everything above seems to be there for the cpu cooling device and

[Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure

2019-10-13 Thread Thara Gopinath
Add thermal.c and thermal.h files that provides interface
APIs to initialize, update/average, track, accumulate and decay
thermal pressure per cpu basis. A per cpu structure max_capacity_info is
introduced to keep track of instantaneous per cpu thermal pressure.
Thermal pressure is the delta between max_capacity and cap_capacity.
API update_periodic_maxcap is called for periodic accumulate and decay
of the thermal pressure. It is to to be called from a periodic tick
function. This API calculates the delta between max_capacity and
cap_capacity and passes on the delta to update_thermal_avg to do the
necessary accumulate, decay and average. API update_maxcap_capacity is for
the system to update the thermal pressure by updating cap_capacity.
Considering, update_periodic_maxcap reads cap_capacity and
update_maxcap_capacity writes into cap_capacity, one can argue for
some sort of locking mechanism to avoid a stale value.
But considering update_periodic_maxcap can be called from a system
critical path like scheduler tick function, a locking mechanism is not
ideal. This means that it is possible the value used to
calculate average thermal pressure for a cpu can be stale for upto 1
tick period.

Signed-off-by: Thara Gopinath 
---
 include/linux/sched.h  | 14 +++
 kernel/sched/Makefile  |  2 +-
 kernel/sched/thermal.c | 66 ++
 kernel/sched/thermal.h | 13 ++
 4 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 kernel/sched/thermal.c
 create mode 100644 kernel/sched/thermal.h

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c2e56b..875ce2b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs *regs)
 
 #endif
 
+#ifdef CONFIG_SMP
+void update_maxcap_capacity(int cpu, u64 capacity);
+
+void populate_max_capacity_info(void);
+#else
+static inline void update_maxcap_capacity(int cpu, u64 capacity)
+{
+}
+
+static inline void populate_max_capacity_info(void)
+{
+}
+#endif
+
 const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq);
 char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len);
 int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq);
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 21fb5a5..4d3b820 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
 obj-y += idle.o fair.o rt.o deadline.o
 obj-y += wait.o wait_bit.o swait.o completion.o
 
-obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
+obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o 
thermal.o
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
new file mode 100644
index 000..5f0b2d4
--- /dev/null
+++ b/kernel/sched/thermal.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sceduler Thermal Interactions
+ *
+ *  Copyright (C) 2018 Linaro, Inc., Thara Gopinath 
+ */
+
+#include 
+#include "sched.h"
+#include "pelt.h"
+#include "thermal.h"
+
+struct max_capacity_info {
+   unsigned long max_capacity;
+   unsigned long cap_capacity;
+};
+
+static DEFINE_PER_CPU(struct max_capacity_info, max_cap);
+
+void update_maxcap_capacity(int cpu, u64 capacity)
+{
+   struct max_capacity_info *__max_cap;
+   unsigned long __capacity;
+
+   __max_cap = (_cpu(max_cap, cpu));
+   if (!__max_cap) {
+   pr_err("no max_capacity_info structure for cpu %d\n", cpu);
+   return;
+   }
+
+   /* Normalize the capacity */
+   __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >>
+   SCHED_CAPACITY_SHIFT;
+   pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, 
__max_cap->cap_capacity, __capacity);
+
+   __max_cap->cap_capacity = __capacity;
+}
+
+void populate_max_capacity_info(void)
+{
+   struct max_capacity_info *__max_cap;
+   u64 capacity;
+   int cpu;
+
+   for_each_possible_cpu(cpu) {
+   __max_cap = (_cpu(max_cap, cpu));
+   if (!__max_cap)
+   continue;
+   capacity = arch_scale_cpu_capacity(cpu);
+   __max_cap->max_capacity = capacity;
+   __max_cap->cap_capacity = capacity;
+   pr_debug("cpu %d max capacity set to %ld\n", cpu, 
__max_cap->max_capacity);
+   }
+}
+
+void update_periodic_maxcap(struct rq *rq)
+{
+   struct max_capacity_info *__max_cap = (_cpu(max_cap, cpu_of(rq)));
+   unsigned long delta;
+
+   if (!__max_cap)
+   return;
+
+   delta = __max_cap->max_capacity - __max_cap->cap_capacity;
+   update_thermal_avg(rq_clock_task(rq), rq, delta);
+}
diff --git a/kernel/sched/thermal.h b/kernel/sched/thermal.h