From: Rik van Riel <[email protected]> The CONFIG_VIRT_CPU_ACCOUNTING_GEN irq time tracking code does not appear to currently work right.
On CPUs that are nohz_full, people typically do not assign IRQs. On the housekeeping CPU (when a system is booted up with nohz_full), sampling should work ok to determine irq and softirq time use, but that only covers the housekeeping CPU itself, not the other non-nohz_full CPUs. On CPUs that are nohz_idle (the typical way a distro kernel is booted), irq time is not accounted at all while the CPU is idle, due to the lack of timer ticks. Remove the VTIME_GEN vtime irq time code. The next patch will allow NO_HZ_FULL kernels to use the IRQ_TIME_ACCOUNTING code. Signed-off-by: Rik van Riel <[email protected]> --- include/linux/vtime.h | 32 +++++++++++++---------------- kernel/sched/cputime.c | 55 ++++++++++++++++++++++++++++---------------------- 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/include/linux/vtime.h b/include/linux/vtime.h index fa2196990f84..3b384bf5ce1a 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -64,17 +64,6 @@ extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -#ifdef __ARCH_HAS_VTIME_ACCOUNT -extern void vtime_account_irq_enter(struct task_struct *tsk); -#else -extern void vtime_common_account_irq_enter(struct task_struct *tsk); -static inline void vtime_account_irq_enter(struct task_struct *tsk) -{ - if (vtime_accounting_cpu_enabled()) - vtime_common_account_irq_enter(tsk); -} -#endif /* __ARCH_HAS_VTIME_ACCOUNT */ - #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } @@ -85,13 +74,8 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk) { } #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void arch_vtime_task_switch(struct task_struct *tsk); -extern void vtime_gen_account_irq_exit(struct task_struct *tsk); - -static inline void vtime_account_irq_exit(struct task_struct *tsk) -{ - if (vtime_accounting_cpu_enabled()) - vtime_gen_account_irq_exit(tsk); -} +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } +static inline void vtime_account_irq_exit(struct task_struct *tsk) { } extern void vtime_user_enter(struct task_struct *tsk); @@ -113,6 +97,18 @@ static inline void vtime_user_exit(struct task_struct *tsk) { } static inline void vtime_guest_enter(struct task_struct *tsk) { } static inline void vtime_guest_exit(struct task_struct *tsk) { } static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { } + +#ifdef __ARCH_HAS_VTIME_ACCOUNT +extern void vtime_account_irq_enter(struct task_struct *tsk); +#else +extern void vtime_common_account_irq_enter(struct task_struct *tsk); +static inline void vtime_account_irq_enter(struct task_struct *tsk) +{ + if (vtime_accounting_cpu_enabled()) + vtime_common_account_irq_enter(tsk); +} +#endif /* __ARCH_HAS_VTIME_ACCOUNT */ + #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 4bd6d1b774ab..2f862dfdb520 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -103,10 +103,10 @@ static unsigned long irqtime_account_si_update(void) u64 softirq; local_irq_save(flags); - delta = this_cpu_read(cpu_softirq_time) - cpustat[CPUTIME_SOFTIRQ]; - si_jiffies = cputime_to_jiffies(delta); + softirq = this_cpu_read(cpu_softirq_time) - cpustat[CPUTIME_SOFTIRQ]; + si_jiffies = cputime_to_jiffies(softirq); if (si_jiffies) - cpustat[CPUSTIME_SOFTIRQ] += jiffies_to_cputime(si_jiffies); + cpustat[CPUTIME_SOFTIRQ] += jiffies_to_cputime(si_jiffies); local_irq_restore(flags); return si_jiffies; } @@ -115,6 +115,16 @@ static unsigned long irqtime_account_si_update(void) #define sched_clock_irqtime (0) +static unsigned long irqtime_account_hi_update(void) +{ + return 0; +} + +static unsigned long irqtime_account_si_update(void) +{ + return 0; +} + #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ static inline void task_group_account_field(struct task_struct *p, int index, @@ -346,7 +356,6 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, { cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); u64 cputime = (__force u64) cputime_one_jiffy; - u64 *cpustat = kcpustat_this_cpu->cpustat; unsigned long other; /* @@ -703,16 +712,26 @@ static cputime_t get_vtime_delta(struct task_struct *tsk) return jiffies_to_cputime(delta); } +/* Account per-cpu irq, softirq, and steal time. Not accounted to a task. */ +static cputime_t __vtime_account_other(void) +{ + unsigned long ticks; + + ticks = steal_account_process_tick(); + ticks += irqtime_account_hi_update(); + ticks += irqtime_account_si_update(); + + return jiffies_to_cputime(ticks); +} + static void __vtime_account_system(struct task_struct *tsk) { - cputime_t steal_time; cputime_t delta_cpu = get_vtime_delta(tsk); - unsigned long delta_st = steal_account_process_tick(); - steal_time = jiffies_to_cputime(delta_st); + cputime_t other = __vtime_account_other(); - if (steal_time >= delta_cpu) + if (other >= delta_cpu) return; - delta_cpu -= steal_time; + delta_cpu -= other; account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); } @@ -726,16 +745,6 @@ void vtime_account_system(struct task_struct *tsk) write_seqcount_end(&tsk->vtime_seqcount); } -void vtime_gen_account_irq_exit(struct task_struct *tsk) -{ - write_seqcount_begin(&tsk->vtime_seqcount); - if (vtime_delta(tsk)) - __vtime_account_system(tsk); - if (context_tracking_in_user()) - tsk->vtime_snap_whence = VTIME_USER; - write_seqcount_end(&tsk->vtime_seqcount); -} - void vtime_account_user(struct task_struct *tsk) { cputime_t delta_cpu; @@ -743,16 +752,14 @@ void vtime_account_user(struct task_struct *tsk) write_seqcount_begin(&tsk->vtime_seqcount); tsk->vtime_snap_whence = VTIME_SYS; if (vtime_delta(tsk)) { - cputime_t steal_time; - unsigned long delta_st = steal_account_process_tick(); + cputime_t other = __vtime_account_other(); delta_cpu = get_vtime_delta(tsk); - steal_time = jiffies_to_cputime(delta_st); - if (steal_time >= delta_cpu) { + if (other >= delta_cpu) { write_seqcount_end(&tsk->vtime_seqcount); return; } - delta_cpu -= steal_time; + delta_cpu -= other; account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); } write_seqcount_end(&tsk->vtime_seqcount); -- 2.5.5

