When a CPU is running a task with coresched enabled, its sibling will be forced idle if the sibling does not have a trusted task to run. It is useful to report forceidle to understand the performance of different cookies of tasks throughout the system.
forceidle is added at the last column of /proc/stat: $ cat /proc/stat cpu 102034 0 11992 8347016 1046 0 11 0 0 0 991 cpu0 59 0 212 80364 59 0 0 0 0 0 0 cpu1 72057 0 89 9102 0 0 0 0 0 0 90 So forceidle% can be computed by any user space tools, for example: CPU user% system% iowait% forceidle% idle% cpu53 24.75 0.00 0.00% 0.99% 74.26% CPU user% system% iowait% forceidle% idle% cpu53 25.74 0.00 0.00% 0.99% 73.27% CPU user% system% iowait% forceidle% idle% cpu53 24.75 0.00 0.00% 0.99% 74.26% CPU user% system% iowait% forceidle% idle% cpu53 25.24 0.00 0.00% 3.88% 70.87% Signed-off-by: Aubrey Li <[email protected]> --- fs/proc/stat.c | 48 +++++++++++++++++++++++++++++++++++++ include/linux/kernel_stat.h | 1 + include/linux/tick.h | 2 ++ kernel/time/tick-sched.c | 48 +++++++++++++++++++++++++++++++++++++ kernel/time/tick-sched.h | 3 +++ 5 files changed, 102 insertions(+) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 46b3293015fe..b27ccac7b5a4 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -28,7 +28,11 @@ static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) u64 idle; idle = kcs->cpustat[CPUTIME_IDLE]; +#ifdef CONFIG_SCHED_CORE + if (cpu_online(cpu) && !nr_iowait_cpu(cpu) && !cpu_rq(cpu)->core->core_forceidle) +#else if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) +#endif idle += arch_idle_time(cpu); return idle; } @@ -43,6 +47,17 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) return iowait; } +#ifdef CONFIG_SCHED_CORE +static u64 get_forceidle_time(struct kernel_cpustat *kcs, int cpu) +{ + u64 forceidle; + + forceidle = kcs->cpustat[CPUTIME_FORCEIDLE]; + if (cpu_online(cpu) && cpu_rq(cpu)->core->core_forceidle) + forceidle += arch_idle_time(cpu); + return forceidle; +} +#endif #else static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) @@ -77,6 +92,21 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) return iowait; } +static u64 get_forceidle_time(struct kernel_cpustat *kcs, int cpu) +{ + u64 forceidle, forceidle_usecs = -1ULL; + + if (cpu_online(cpu)) + forceidle_usecs = get_cpu_forceidle_time_us(cpu, NULL); + + if (forceidle_usecs == -1ULL) + /* !NO_HZ or cpu offline so we can rely on cpustat.forceidle */ + forceidle = kcs->cpustat[CPUTIME_FORCEIDLE]; + else + forceidle = forceidle_usecs * NSEC_PER_USEC; + + return forceidle; +} #endif static void show_irq_gap(struct seq_file *p, unsigned int gap) @@ -111,12 +141,18 @@ static int show_stat(struct seq_file *p, void *v) u64 guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; +#ifdef CONFIG_SCHED_CORE + u64 forceidle; +#endif unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec64 boottime; user = nice = system = idle = iowait = irq = softirq = steal = 0; guest = guest_nice = 0; +#ifdef CONFIG_SCHED_CORE + forceidle = 0; +#endif getboottime64(&boottime); for_each_possible_cpu(i) { @@ -130,6 +166,9 @@ static int show_stat(struct seq_file *p, void *v) system += cpustat[CPUTIME_SYSTEM]; idle += get_idle_time(&kcpustat, i); iowait += get_iowait_time(&kcpustat, i); +#ifdef CONFIG_SCHED_CORE + forceidle += get_forceidle_time(&kcpustat, i); +#endif irq += cpustat[CPUTIME_IRQ]; softirq += cpustat[CPUTIME_SOFTIRQ]; steal += cpustat[CPUTIME_STEAL]; @@ -157,6 +196,9 @@ static int show_stat(struct seq_file *p, void *v) seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); +#ifdef CONFIG_SCHED_CORE + seq_put_decimal_ull(p, " ", nsec_to_clock_t(forceidle)); +#endif seq_putc(p, '\n'); for_each_online_cpu(i) { @@ -171,6 +213,9 @@ static int show_stat(struct seq_file *p, void *v) system = cpustat[CPUTIME_SYSTEM]; idle = get_idle_time(&kcpustat, i); iowait = get_iowait_time(&kcpustat, i); +#ifdef CONFIG_SCHED_CORE + forceidle = get_forceidle_time(&kcpustat, i); +#endif irq = cpustat[CPUTIME_IRQ]; softirq = cpustat[CPUTIME_SOFTIRQ]; steal = cpustat[CPUTIME_STEAL]; @@ -187,6 +232,9 @@ static int show_stat(struct seq_file *p, void *v) seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); +#ifdef CONFIG_SCHED_CORE + seq_put_decimal_ull(p, " ", nsec_to_clock_t(forceidle)); +#endif seq_putc(p, '\n'); } seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 89f0745c096d..c7ce4bfe757e 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -25,6 +25,7 @@ enum cpu_usage_stat { CPUTIME_IRQ, CPUTIME_IDLE, CPUTIME_IOWAIT, + CPUTIME_FORCEIDLE, CPUTIME_STEAL, CPUTIME_GUEST, CPUTIME_GUEST_NICE, diff --git a/include/linux/tick.h b/include/linux/tick.h index 7340613c7eff..7fce78f46930 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -139,6 +139,7 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); +extern u64 get_cpu_forceidle_time_us(int cpu, u64 *last_update_time); static inline void tick_nohz_idle_stop_tick_protected(void) { @@ -169,6 +170,7 @@ static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } +static inline u64 get_cpu_forceidle_time_us(int cpu, u64 *unused) { return -1; } static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1b734070f028..de94e5bab5a1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -29,6 +29,7 @@ #include <asm/irq_regs.h> #include "tick-internal.h" +#include "../sched/sched.h" #include <trace/events/timer.h> @@ -547,6 +548,10 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda delta = ktime_sub(now, ts->idle_entrytime); if (nr_iowait_cpu(cpu) > 0) ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); +#ifdef CONFIG_SCHED_CORE + else if (cpu_rq(cpu)->core->core_forceidle) + ts->forceidle_sleeptime = ktime_add(ts->forceidle_sleeptime, delta); +#endif else ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_entrytime = now; @@ -653,6 +658,49 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); +#ifdef CONFIG_SCHED_CORE +/** + * get_cpu_forceidle_time_us - get the total force idle time of a CPU + * @cpu: CPU number to query + * @last_update_time: variable to store update time in. Do not update + * counters if NULL. + * + * Return the cumulative force idle time (since boot) for a given + * CPU, in microseconds. + * + * This time is measured via accounting rather than sampling, + * and is as accurate as ktime_get() is. + * + * This function returns -1 if NOHZ is not enabled. + */ +u64 get_cpu_forceidle_time_us(int cpu, u64 *last_update_time) +{ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now, forceidle; + + if (!tick_nohz_active) + return -1; + + now = ktime_get(); + if (last_update_time) { + update_ts_time_stats(cpu, ts, now, last_update_time); + forceidle = ts->forceidle_sleeptime; + } else { + if (ts->idle_active && cpu_rq(cpu)->core->core_forceidle) { + ktime_t delta = ktime_sub(now, ts->idle_entrytime); + + forceidle = ktime_add(ts->forceidle_sleeptime, delta); + } else { + forceidle = ts->forceidle_sleeptime; + } + } + + return ktime_to_us(forceidle); + +} +EXPORT_SYMBOL_GPL(get_cpu_forceidle_time_us); +#endif + static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) { hrtimer_cancel(&ts->sched_timer); diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 4fb06527cf64..4c00c5399055 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -71,6 +71,9 @@ struct tick_sched { ktime_t idle_exittime; ktime_t idle_sleeptime; ktime_t iowait_sleeptime; +#ifdef CONFIG_SCHED_CORE + ktime_t forceidle_sleeptime; +#endif unsigned long last_jiffies; u64 timer_expires; u64 timer_expires_base; -- 2.17.1

