From: Rik van Riel <[email protected]>

The CONFIG_VIRT_CPU_ACCOUNTING_GEN irq time tracking code does not
appear to currently work right.

On CPUs that are nohz_full, people typically do not assign IRQs.
On the housekeeping CPU (when a system is booted up with nohz_full),
sampling should work ok to determine irq and softirq time use, but
that only covers the housekeeping CPU itself, not the other
non-nohz_full CPUs.

On CPUs that are nohz_idle (the typical way a distro kernel is
booted), irq time is not accounted at all while the CPU is idle,
due to the lack of timer ticks.

Remove the VTIME_GEN vtime irq time code. The next patch will
allow NO_HZ_FULL kernels to use the IRQ_TIME_ACCOUNTING code.

Signed-off-by: Rik van Riel <[email protected]>
---
 include/linux/vtime.h  | 32 +++++++++++++----------------
 kernel/sched/cputime.c | 55 ++++++++++++++++++++++++++++----------------------
 2 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index fa2196990f84..3b384bf5ce1a 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -64,17 +64,6 @@ extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 extern void vtime_account_user(struct task_struct *tsk);
 
-#ifdef __ARCH_HAS_VTIME_ACCOUNT
-extern void vtime_account_irq_enter(struct task_struct *tsk);
-#else
-extern void vtime_common_account_irq_enter(struct task_struct *tsk);
-static inline void vtime_account_irq_enter(struct task_struct *tsk)
-{
-       if (vtime_accounting_cpu_enabled())
-               vtime_common_account_irq_enter(tsk);
-}
-#endif /* __ARCH_HAS_VTIME_ACCOUNT */
-
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static inline void vtime_task_switch(struct task_struct *prev) { }
@@ -85,13 +74,8 @@ static inline void vtime_account_irq_enter(struct 
task_struct *tsk) { }
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void arch_vtime_task_switch(struct task_struct *tsk);
-extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
-
-static inline void vtime_account_irq_exit(struct task_struct *tsk)
-{
-       if (vtime_accounting_cpu_enabled())
-               vtime_gen_account_irq_exit(tsk);
-}
+static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
+static inline void vtime_account_irq_exit(struct task_struct *tsk) { }
 
 extern void vtime_user_enter(struct task_struct *tsk);
 
@@ -113,6 +97,18 @@ static inline void vtime_user_exit(struct task_struct *tsk) 
{ }
 static inline void vtime_guest_enter(struct task_struct *tsk) { }
 static inline void vtime_guest_exit(struct task_struct *tsk) { }
 static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
+
+#ifdef __ARCH_HAS_VTIME_ACCOUNT
+extern void vtime_account_irq_enter(struct task_struct *tsk);
+#else
+extern void vtime_common_account_irq_enter(struct task_struct *tsk);
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
+{
+       if (vtime_accounting_cpu_enabled())
+               vtime_common_account_irq_enter(tsk);
+}
+#endif /* __ARCH_HAS_VTIME_ACCOUNT */
+
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 4bd6d1b774ab..2f862dfdb520 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -103,10 +103,10 @@ static unsigned long irqtime_account_si_update(void)
        u64 softirq;
 
        local_irq_save(flags);
-       delta = this_cpu_read(cpu_softirq_time) - cpustat[CPUTIME_SOFTIRQ];
-       si_jiffies = cputime_to_jiffies(delta);
+       softirq = this_cpu_read(cpu_softirq_time) - cpustat[CPUTIME_SOFTIRQ];
+       si_jiffies = cputime_to_jiffies(softirq);
        if (si_jiffies)
-               cpustat[CPUSTIME_SOFTIRQ] += jiffies_to_cputime(si_jiffies);
+               cpustat[CPUTIME_SOFTIRQ] += jiffies_to_cputime(si_jiffies);
        local_irq_restore(flags);
        return si_jiffies;
 }
@@ -115,6 +115,16 @@ static unsigned long irqtime_account_si_update(void)
 
 #define sched_clock_irqtime    (0)
 
+static unsigned long irqtime_account_hi_update(void)
+{
+       return 0;
+}
+
+static unsigned long irqtime_account_si_update(void)
+{
+       return 0;
+}
+
 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 
 static inline void task_group_account_field(struct task_struct *p, int index,
@@ -346,7 +356,6 @@ static void irqtime_account_process_tick(struct task_struct 
*p, int user_tick,
 {
        cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
        u64 cputime = (__force u64) cputime_one_jiffy;
-       u64 *cpustat = kcpustat_this_cpu->cpustat;
        unsigned long other;
 
        /*
@@ -703,16 +712,26 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
        return jiffies_to_cputime(delta);
 }
 
+/* Account per-cpu irq, softirq, and steal time. Not accounted to a task. */
+static cputime_t __vtime_account_other(void)
+{
+       unsigned long ticks;
+
+       ticks = steal_account_process_tick();
+       ticks += irqtime_account_hi_update();
+       ticks += irqtime_account_si_update();
+
+       return jiffies_to_cputime(ticks);
+}
+
 static void __vtime_account_system(struct task_struct *tsk)
 {
-       cputime_t steal_time;
        cputime_t delta_cpu = get_vtime_delta(tsk);
-       unsigned long delta_st = steal_account_process_tick();
-       steal_time = jiffies_to_cputime(delta_st);
+       cputime_t other = __vtime_account_other();
 
-       if (steal_time >= delta_cpu)
+       if (other >= delta_cpu)
                return;
-       delta_cpu -= steal_time;
+       delta_cpu -= other;
        account_system_time(tsk, irq_count(), delta_cpu, 
cputime_to_scaled(delta_cpu));
 }
 
@@ -726,16 +745,6 @@ void vtime_account_system(struct task_struct *tsk)
        write_seqcount_end(&tsk->vtime_seqcount);
 }
 
-void vtime_gen_account_irq_exit(struct task_struct *tsk)
-{
-       write_seqcount_begin(&tsk->vtime_seqcount);
-       if (vtime_delta(tsk))
-               __vtime_account_system(tsk);
-       if (context_tracking_in_user())
-               tsk->vtime_snap_whence = VTIME_USER;
-       write_seqcount_end(&tsk->vtime_seqcount);
-}
-
 void vtime_account_user(struct task_struct *tsk)
 {
        cputime_t delta_cpu;
@@ -743,16 +752,14 @@ void vtime_account_user(struct task_struct *tsk)
        write_seqcount_begin(&tsk->vtime_seqcount);
        tsk->vtime_snap_whence = VTIME_SYS;
        if (vtime_delta(tsk)) {
-               cputime_t steal_time;
-               unsigned long delta_st = steal_account_process_tick();
+               cputime_t other = __vtime_account_other();
                delta_cpu = get_vtime_delta(tsk);
-               steal_time = jiffies_to_cputime(delta_st);
 
-               if (steal_time >= delta_cpu) {
+               if (other >= delta_cpu) {
                        write_seqcount_end(&tsk->vtime_seqcount);
                        return;
                }
-               delta_cpu -= steal_time;
+               delta_cpu -= other;
                account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
        }
        write_seqcount_end(&tsk->vtime_seqcount);
-- 
2.5.5

Reply via email to