Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure
Hi Thara, On Thu, 17 Oct 2019 at 18:40, Thara Gopinath wrote: > > On 10/17/2019 04:44 AM, Vincent Guittot wrote: > > Hi Thara, > > > > On Wed, 16 Oct 2019 at 23:22, Thara Gopinath > > wrote: > >> > >> Hi Vincent, > >> > >> Thanks for the review > >> On 10/14/2019 11:50 AM, Vincent Guittot wrote: > >>> Hi Thara, > >>> > >>> On Mon, 14 Oct 2019 at 02:58, Thara Gopinath > >>> wrote: > > Add thermal.c and thermal.h files that provides interface > APIs to initialize, update/average, track, accumulate and decay > thermal pressure per cpu basis. A per cpu structure max_capacity_info is > introduced to keep track of instantaneous per cpu thermal pressure. > Thermal pressure is the delta between max_capacity and cap_capacity. > API update_periodic_maxcap is called for periodic accumulate and decay > of the thermal pressure. It is to to be called from a periodic tick > function. This API calculates the delta between max_capacity and > cap_capacity and passes on the delta to update_thermal_avg to do the > necessary accumulate, decay and average. API update_maxcap_capacity is > for > the system to update the thermal pressure by updating cap_capacity. > Considering, update_periodic_maxcap reads cap_capacity and > update_maxcap_capacity writes into cap_capacity, one can argue for > some sort of locking mechanism to avoid a stale value. > But considering update_periodic_maxcap can be called from a system > critical path like scheduler tick function, a locking mechanism is not > ideal. This means that it is possible the value used to > calculate average thermal pressure for a cpu can be stale for upto 1 > tick period. > > Signed-off-by: Thara Gopinath > --- > include/linux/sched.h | 14 +++ > kernel/sched/Makefile | 2 +- > kernel/sched/thermal.c | 66 > ++ > kernel/sched/thermal.h | 13 ++ > 4 files changed, 94 insertions(+), 1 deletion(-) > create mode 100644 kernel/sched/thermal.c > create mode 100644 kernel/sched/thermal.h > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 2c2e56b..875ce2b 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs > *regs) > > #endif > > +#ifdef CONFIG_SMP > +void update_maxcap_capacity(int cpu, u64 capacity); > + > +void populate_max_capacity_info(void); > +#else > +static inline void update_maxcap_capacity(int cpu, u64 capacity) > +{ > +} > + > +static inline void populate_max_capacity_info(void) > +{ > +} > +#endif > + > const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); > char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int > len); > int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); > diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile > index 21fb5a5..4d3b820 100644 > --- a/kernel/sched/Makefile > +++ b/kernel/sched/Makefile > @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o > obj-y += idle.o fair.o rt.o deadline.o > obj-y += wait.o wait_bit.o swait.o completion.o > > -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o > pelt.o > +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o > pelt.o thermal.o > obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o > obj-$(CONFIG_SCHEDSTATS) += stats.o > obj-$(CONFIG_SCHED_DEBUG) += debug.o > diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c > new file mode 100644 > index 000..5f0b2d4 > --- /dev/null > +++ b/kernel/sched/thermal.c > @@ -0,0 +1,66 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Sceduler Thermal Interactions > + * > + * Copyright (C) 2018 Linaro, Inc., Thara Gopinath > > + */ > + > +#include > +#include "sched.h" > +#include "pelt.h" > +#include "thermal.h" > + > +struct max_capacity_info { > + unsigned long max_capacity; > + unsigned long cap_capacity; > +}; > + > +static DEFINE_PER_CPU(struct max_capacity_info, max_cap); > + > +void update_maxcap_capacity(int cpu, u64 capacity) > +{ > + struct max_capacity_info *__max_cap; > + unsigned long __capacity; > + > + __max_cap = (_cpu(max_cap, cpu)); > + if (!__max_cap) { > + pr_err("no max_capacity_info structure for cpu %d\n", > cpu); > + return; > + } > + > + /* Normalize the capacity */ > + __capacity = (capacity *
Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure
On 10/17/2019 04:44 AM, Vincent Guittot wrote: > Hi Thara, > > On Wed, 16 Oct 2019 at 23:22, Thara Gopinath > wrote: >> >> Hi Vincent, >> >> Thanks for the review >> On 10/14/2019 11:50 AM, Vincent Guittot wrote: >>> Hi Thara, >>> >>> On Mon, 14 Oct 2019 at 02:58, Thara Gopinath >>> wrote: Add thermal.c and thermal.h files that provides interface APIs to initialize, update/average, track, accumulate and decay thermal pressure per cpu basis. A per cpu structure max_capacity_info is introduced to keep track of instantaneous per cpu thermal pressure. Thermal pressure is the delta between max_capacity and cap_capacity. API update_periodic_maxcap is called for periodic accumulate and decay of the thermal pressure. It is to to be called from a periodic tick function. This API calculates the delta between max_capacity and cap_capacity and passes on the delta to update_thermal_avg to do the necessary accumulate, decay and average. API update_maxcap_capacity is for the system to update the thermal pressure by updating cap_capacity. Considering, update_periodic_maxcap reads cap_capacity and update_maxcap_capacity writes into cap_capacity, one can argue for some sort of locking mechanism to avoid a stale value. But considering update_periodic_maxcap can be called from a system critical path like scheduler tick function, a locking mechanism is not ideal. This means that it is possible the value used to calculate average thermal pressure for a cpu can be stale for upto 1 tick period. Signed-off-by: Thara Gopinath --- include/linux/sched.h | 14 +++ kernel/sched/Makefile | 2 +- kernel/sched/thermal.c | 66 ++ kernel/sched/thermal.h | 13 ++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 kernel/sched/thermal.c create mode 100644 kernel/sched/thermal.h diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c2e56b..875ce2b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs *regs) #endif +#ifdef CONFIG_SMP +void update_maxcap_capacity(int cpu, u64 capacity); + +void populate_max_capacity_info(void); +#else +static inline void update_maxcap_capacity(int cpu, u64 capacity) +{ +} + +static inline void populate_max_capacity_info(void) +{ +} +#endif + const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 21fb5a5..4d3b820 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle.o fair.o rt.o deadline.o obj-y += wait.o wait_bit.o swait.o completion.o -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o thermal.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c new file mode 100644 index 000..5f0b2d4 --- /dev/null +++ b/kernel/sched/thermal.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Sceduler Thermal Interactions + * + * Copyright (C) 2018 Linaro, Inc., Thara Gopinath + */ + +#include +#include "sched.h" +#include "pelt.h" +#include "thermal.h" + +struct max_capacity_info { + unsigned long max_capacity; + unsigned long cap_capacity; +}; + +static DEFINE_PER_CPU(struct max_capacity_info, max_cap); + +void update_maxcap_capacity(int cpu, u64 capacity) +{ + struct max_capacity_info *__max_cap; + unsigned long __capacity; + + __max_cap = (_cpu(max_cap, cpu)); + if (!__max_cap) { + pr_err("no max_capacity_info structure for cpu %d\n", cpu); + return; + } + + /* Normalize the capacity */ + __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >> + SCHED_CAPACITY_SHIFT; + pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, __max_cap->cap_capacity, __capacity); + + __max_cap->cap_capacity = __capacity; +} + +void populate_max_capacity_info(void) +{
Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure
Hi Thara, On Wed, 16 Oct 2019 at 23:22, Thara Gopinath wrote: > > Hi Vincent, > > Thanks for the review > On 10/14/2019 11:50 AM, Vincent Guittot wrote: > > Hi Thara, > > > > On Mon, 14 Oct 2019 at 02:58, Thara Gopinath > > wrote: > >> > >> Add thermal.c and thermal.h files that provides interface > >> APIs to initialize, update/average, track, accumulate and decay > >> thermal pressure per cpu basis. A per cpu structure max_capacity_info is > >> introduced to keep track of instantaneous per cpu thermal pressure. > >> Thermal pressure is the delta between max_capacity and cap_capacity. > >> API update_periodic_maxcap is called for periodic accumulate and decay > >> of the thermal pressure. It is to to be called from a periodic tick > >> function. This API calculates the delta between max_capacity and > >> cap_capacity and passes on the delta to update_thermal_avg to do the > >> necessary accumulate, decay and average. API update_maxcap_capacity is for > >> the system to update the thermal pressure by updating cap_capacity. > >> Considering, update_periodic_maxcap reads cap_capacity and > >> update_maxcap_capacity writes into cap_capacity, one can argue for > >> some sort of locking mechanism to avoid a stale value. > >> But considering update_periodic_maxcap can be called from a system > >> critical path like scheduler tick function, a locking mechanism is not > >> ideal. This means that it is possible the value used to > >> calculate average thermal pressure for a cpu can be stale for upto 1 > >> tick period. > >> > >> Signed-off-by: Thara Gopinath > >> --- > >> include/linux/sched.h | 14 +++ > >> kernel/sched/Makefile | 2 +- > >> kernel/sched/thermal.c | 66 > >> ++ > >> kernel/sched/thermal.h | 13 ++ > >> 4 files changed, 94 insertions(+), 1 deletion(-) > >> create mode 100644 kernel/sched/thermal.c > >> create mode 100644 kernel/sched/thermal.h > >> > >> diff --git a/include/linux/sched.h b/include/linux/sched.h > >> index 2c2e56b..875ce2b 100644 > >> --- a/include/linux/sched.h > >> +++ b/include/linux/sched.h > >> @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs > >> *regs) > >> > >> #endif > >> > >> +#ifdef CONFIG_SMP > >> +void update_maxcap_capacity(int cpu, u64 capacity); > >> + > >> +void populate_max_capacity_info(void); > >> +#else > >> +static inline void update_maxcap_capacity(int cpu, u64 capacity) > >> +{ > >> +} > >> + > >> +static inline void populate_max_capacity_info(void) > >> +{ > >> +} > >> +#endif > >> + > >> const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); > >> char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); > >> int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); > >> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile > >> index 21fb5a5..4d3b820 100644 > >> --- a/kernel/sched/Makefile > >> +++ b/kernel/sched/Makefile > >> @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o > >> obj-y += idle.o fair.o rt.o deadline.o > >> obj-y += wait.o wait_bit.o swait.o completion.o > >> > >> -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o > >> +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o > >> thermal.o > >> obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o > >> obj-$(CONFIG_SCHEDSTATS) += stats.o > >> obj-$(CONFIG_SCHED_DEBUG) += debug.o > >> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c > >> new file mode 100644 > >> index 000..5f0b2d4 > >> --- /dev/null > >> +++ b/kernel/sched/thermal.c > >> @@ -0,0 +1,66 @@ > >> +// SPDX-License-Identifier: GPL-2.0 > >> +/* > >> + * Sceduler Thermal Interactions > >> + * > >> + * Copyright (C) 2018 Linaro, Inc., Thara Gopinath > >> > >> + */ > >> + > >> +#include > >> +#include "sched.h" > >> +#include "pelt.h" > >> +#include "thermal.h" > >> + > >> +struct max_capacity_info { > >> + unsigned long max_capacity; > >> + unsigned long cap_capacity; > >> +}; > >> + > >> +static DEFINE_PER_CPU(struct max_capacity_info, max_cap); > >> + > >> +void update_maxcap_capacity(int cpu, u64 capacity) > >> +{ > >> + struct max_capacity_info *__max_cap; > >> + unsigned long __capacity; > >> + > >> + __max_cap = (_cpu(max_cap, cpu)); > >> + if (!__max_cap) { > >> + pr_err("no max_capacity_info structure for cpu %d\n", cpu); > >> + return; > >> + } > >> + > >> + /* Normalize the capacity */ > >> + __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >> > >> + > >> SCHED_CAPACITY_SHIFT; > >> + pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, > >> __max_cap->cap_capacity, __capacity); > >> + > >> + __max_cap->cap_capacity = __capacity; > >> +} > >> + > >> +void populate_max_capacity_info(void) > >> +{ > >> + struct max_capacity_info *__max_cap; > >> +
Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure
Hi Vincent, Thanks for the review On 10/14/2019 11:50 AM, Vincent Guittot wrote: > Hi Thara, > > On Mon, 14 Oct 2019 at 02:58, Thara Gopinath > wrote: >> >> Add thermal.c and thermal.h files that provides interface >> APIs to initialize, update/average, track, accumulate and decay >> thermal pressure per cpu basis. A per cpu structure max_capacity_info is >> introduced to keep track of instantaneous per cpu thermal pressure. >> Thermal pressure is the delta between max_capacity and cap_capacity. >> API update_periodic_maxcap is called for periodic accumulate and decay >> of the thermal pressure. It is to to be called from a periodic tick >> function. This API calculates the delta between max_capacity and >> cap_capacity and passes on the delta to update_thermal_avg to do the >> necessary accumulate, decay and average. API update_maxcap_capacity is for >> the system to update the thermal pressure by updating cap_capacity. >> Considering, update_periodic_maxcap reads cap_capacity and >> update_maxcap_capacity writes into cap_capacity, one can argue for >> some sort of locking mechanism to avoid a stale value. >> But considering update_periodic_maxcap can be called from a system >> critical path like scheduler tick function, a locking mechanism is not >> ideal. This means that it is possible the value used to >> calculate average thermal pressure for a cpu can be stale for upto 1 >> tick period. >> >> Signed-off-by: Thara Gopinath >> --- >> include/linux/sched.h | 14 +++ >> kernel/sched/Makefile | 2 +- >> kernel/sched/thermal.c | 66 >> ++ >> kernel/sched/thermal.h | 13 ++ >> 4 files changed, 94 insertions(+), 1 deletion(-) >> create mode 100644 kernel/sched/thermal.c >> create mode 100644 kernel/sched/thermal.h >> >> diff --git a/include/linux/sched.h b/include/linux/sched.h >> index 2c2e56b..875ce2b 100644 >> --- a/include/linux/sched.h >> +++ b/include/linux/sched.h >> @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs *regs) >> >> #endif >> >> +#ifdef CONFIG_SMP >> +void update_maxcap_capacity(int cpu, u64 capacity); >> + >> +void populate_max_capacity_info(void); >> +#else >> +static inline void update_maxcap_capacity(int cpu, u64 capacity) >> +{ >> +} >> + >> +static inline void populate_max_capacity_info(void) >> +{ >> +} >> +#endif >> + >> const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); >> char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); >> int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); >> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile >> index 21fb5a5..4d3b820 100644 >> --- a/kernel/sched/Makefile >> +++ b/kernel/sched/Makefile >> @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o >> obj-y += idle.o fair.o rt.o deadline.o >> obj-y += wait.o wait_bit.o swait.o completion.o >> >> -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o >> +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o >> thermal.o >> obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o >> obj-$(CONFIG_SCHEDSTATS) += stats.o >> obj-$(CONFIG_SCHED_DEBUG) += debug.o >> diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c >> new file mode 100644 >> index 000..5f0b2d4 >> --- /dev/null >> +++ b/kernel/sched/thermal.c >> @@ -0,0 +1,66 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * Sceduler Thermal Interactions >> + * >> + * Copyright (C) 2018 Linaro, Inc., Thara Gopinath >> >> + */ >> + >> +#include >> +#include "sched.h" >> +#include "pelt.h" >> +#include "thermal.h" >> + >> +struct max_capacity_info { >> + unsigned long max_capacity; >> + unsigned long cap_capacity; >> +}; >> + >> +static DEFINE_PER_CPU(struct max_capacity_info, max_cap); >> + >> +void update_maxcap_capacity(int cpu, u64 capacity) >> +{ >> + struct max_capacity_info *__max_cap; >> + unsigned long __capacity; >> + >> + __max_cap = (_cpu(max_cap, cpu)); >> + if (!__max_cap) { >> + pr_err("no max_capacity_info structure for cpu %d\n", cpu); >> + return; >> + } >> + >> + /* Normalize the capacity */ >> + __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >> >> + SCHED_CAPACITY_SHIFT; >> + pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, >> __max_cap->cap_capacity, __capacity); >> + >> + __max_cap->cap_capacity = __capacity; >> +} >> + >> +void populate_max_capacity_info(void) >> +{ >> + struct max_capacity_info *__max_cap; >> + u64 capacity; >> + int cpu; >> + >> + for_each_possible_cpu(cpu) { >> + __max_cap = (_cpu(max_cap, cpu)); >> + if (!__max_cap) >> + continue; >> + capacity = arch_scale_cpu_capacity(cpu); >> + __max_cap->max_capacity = capacity; >> +
Re: [Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure
Hi Thara, On Mon, 14 Oct 2019 at 02:58, Thara Gopinath wrote: > > Add thermal.c and thermal.h files that provides interface > APIs to initialize, update/average, track, accumulate and decay > thermal pressure per cpu basis. A per cpu structure max_capacity_info is > introduced to keep track of instantaneous per cpu thermal pressure. > Thermal pressure is the delta between max_capacity and cap_capacity. > API update_periodic_maxcap is called for periodic accumulate and decay > of the thermal pressure. It is to to be called from a periodic tick > function. This API calculates the delta between max_capacity and > cap_capacity and passes on the delta to update_thermal_avg to do the > necessary accumulate, decay and average. API update_maxcap_capacity is for > the system to update the thermal pressure by updating cap_capacity. > Considering, update_periodic_maxcap reads cap_capacity and > update_maxcap_capacity writes into cap_capacity, one can argue for > some sort of locking mechanism to avoid a stale value. > But considering update_periodic_maxcap can be called from a system > critical path like scheduler tick function, a locking mechanism is not > ideal. This means that it is possible the value used to > calculate average thermal pressure for a cpu can be stale for upto 1 > tick period. > > Signed-off-by: Thara Gopinath > --- > include/linux/sched.h | 14 +++ > kernel/sched/Makefile | 2 +- > kernel/sched/thermal.c | 66 > ++ > kernel/sched/thermal.h | 13 ++ > 4 files changed, 94 insertions(+), 1 deletion(-) > create mode 100644 kernel/sched/thermal.c > create mode 100644 kernel/sched/thermal.h > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 2c2e56b..875ce2b 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs *regs) > > #endif > > +#ifdef CONFIG_SMP > +void update_maxcap_capacity(int cpu, u64 capacity); > + > +void populate_max_capacity_info(void); > +#else > +static inline void update_maxcap_capacity(int cpu, u64 capacity) > +{ > +} > + > +static inline void populate_max_capacity_info(void) > +{ > +} > +#endif > + > const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); > char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); > int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); > diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile > index 21fb5a5..4d3b820 100644 > --- a/kernel/sched/Makefile > +++ b/kernel/sched/Makefile > @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o > obj-y += idle.o fair.o rt.o deadline.o > obj-y += wait.o wait_bit.o swait.o completion.o > > -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o > +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o > thermal.o > obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o > obj-$(CONFIG_SCHEDSTATS) += stats.o > obj-$(CONFIG_SCHED_DEBUG) += debug.o > diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c > new file mode 100644 > index 000..5f0b2d4 > --- /dev/null > +++ b/kernel/sched/thermal.c > @@ -0,0 +1,66 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Sceduler Thermal Interactions > + * > + * Copyright (C) 2018 Linaro, Inc., Thara Gopinath > > + */ > + > +#include > +#include "sched.h" > +#include "pelt.h" > +#include "thermal.h" > + > +struct max_capacity_info { > + unsigned long max_capacity; > + unsigned long cap_capacity; > +}; > + > +static DEFINE_PER_CPU(struct max_capacity_info, max_cap); > + > +void update_maxcap_capacity(int cpu, u64 capacity) > +{ > + struct max_capacity_info *__max_cap; > + unsigned long __capacity; > + > + __max_cap = (_cpu(max_cap, cpu)); > + if (!__max_cap) { > + pr_err("no max_capacity_info structure for cpu %d\n", cpu); > + return; > + } > + > + /* Normalize the capacity */ > + __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >> > + SCHED_CAPACITY_SHIFT; > + pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, > __max_cap->cap_capacity, __capacity); > + > + __max_cap->cap_capacity = __capacity; > +} > + > +void populate_max_capacity_info(void) > +{ > + struct max_capacity_info *__max_cap; > + u64 capacity; > + int cpu; > + > + for_each_possible_cpu(cpu) { > + __max_cap = (_cpu(max_cap, cpu)); > + if (!__max_cap) > + continue; > + capacity = arch_scale_cpu_capacity(cpu); > + __max_cap->max_capacity = capacity; > + __max_cap->cap_capacity = capacity; > + pr_debug("cpu %d max capacity set to %ld\n", cpu, > __max_cap->max_capacity); > + } > +} everything above seems to be there for the cpu cooling device and
[Patch v3 2/7] sched: Add infrastructure to store and update instantaneous thermal pressure
Add thermal.c and thermal.h files that provides interface APIs to initialize, update/average, track, accumulate and decay thermal pressure per cpu basis. A per cpu structure max_capacity_info is introduced to keep track of instantaneous per cpu thermal pressure. Thermal pressure is the delta between max_capacity and cap_capacity. API update_periodic_maxcap is called for periodic accumulate and decay of the thermal pressure. It is to to be called from a periodic tick function. This API calculates the delta between max_capacity and cap_capacity and passes on the delta to update_thermal_avg to do the necessary accumulate, decay and average. API update_maxcap_capacity is for the system to update the thermal pressure by updating cap_capacity. Considering, update_periodic_maxcap reads cap_capacity and update_maxcap_capacity writes into cap_capacity, one can argue for some sort of locking mechanism to avoid a stale value. But considering update_periodic_maxcap can be called from a system critical path like scheduler tick function, a locking mechanism is not ideal. This means that it is possible the value used to calculate average thermal pressure for a cpu can be stale for upto 1 tick period. Signed-off-by: Thara Gopinath --- include/linux/sched.h | 14 +++ kernel/sched/Makefile | 2 +- kernel/sched/thermal.c | 66 ++ kernel/sched/thermal.h | 13 ++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 kernel/sched/thermal.c create mode 100644 kernel/sched/thermal.h diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c2e56b..875ce2b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1983,6 +1983,20 @@ static inline void rseq_syscall(struct pt_regs *regs) #endif +#ifdef CONFIG_SMP +void update_maxcap_capacity(int cpu, u64 capacity); + +void populate_max_capacity_info(void); +#else +static inline void update_maxcap_capacity(int cpu, u64 capacity) +{ +} + +static inline void populate_max_capacity_info(void) +{ +} +#endif + const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq); char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len); int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq); diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 21fb5a5..4d3b820 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle.o fair.o rt.o deadline.o obj-y += wait.o wait_bit.o swait.o completion.o -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o +obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o thermal.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c new file mode 100644 index 000..5f0b2d4 --- /dev/null +++ b/kernel/sched/thermal.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Sceduler Thermal Interactions + * + * Copyright (C) 2018 Linaro, Inc., Thara Gopinath + */ + +#include +#include "sched.h" +#include "pelt.h" +#include "thermal.h" + +struct max_capacity_info { + unsigned long max_capacity; + unsigned long cap_capacity; +}; + +static DEFINE_PER_CPU(struct max_capacity_info, max_cap); + +void update_maxcap_capacity(int cpu, u64 capacity) +{ + struct max_capacity_info *__max_cap; + unsigned long __capacity; + + __max_cap = (_cpu(max_cap, cpu)); + if (!__max_cap) { + pr_err("no max_capacity_info structure for cpu %d\n", cpu); + return; + } + + /* Normalize the capacity */ + __capacity = (capacity * arch_scale_cpu_capacity(cpu)) >> + SCHED_CAPACITY_SHIFT; + pr_debug("updating cpu%d capped capacity from %lu to %lu\n", cpu, __max_cap->cap_capacity, __capacity); + + __max_cap->cap_capacity = __capacity; +} + +void populate_max_capacity_info(void) +{ + struct max_capacity_info *__max_cap; + u64 capacity; + int cpu; + + for_each_possible_cpu(cpu) { + __max_cap = (_cpu(max_cap, cpu)); + if (!__max_cap) + continue; + capacity = arch_scale_cpu_capacity(cpu); + __max_cap->max_capacity = capacity; + __max_cap->cap_capacity = capacity; + pr_debug("cpu %d max capacity set to %ld\n", cpu, __max_cap->max_capacity); + } +} + +void update_periodic_maxcap(struct rq *rq) +{ + struct max_capacity_info *__max_cap = (_cpu(max_cap, cpu_of(rq))); + unsigned long delta; + + if (!__max_cap) + return; + + delta = __max_cap->max_capacity - __max_cap->cap_capacity; + update_thermal_avg(rq_clock_task(rq), rq, delta); +} diff --git a/kernel/sched/thermal.h b/kernel/sched/thermal.h