On Thu, Jun 30, 2016 at 03:35:47PM -0400, [email protected] wrote:
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 3d60e5d76fdb..018bae2ada36 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr)
>  }
>  EXPORT_SYMBOL_GPL(irqtime_account_irq);
>  
> -static int irqtime_account_hi_update(void)
> +static cputime_t irqtime_account_hi_update(cputime_t maxtime)
>  {
>       u64 *cpustat = kcpustat_this_cpu->cpustat;
>       unsigned long flags;
> -     u64 latest_ns;
> -     int ret = 0;
> +     cputime_t irq_cputime;
>  
>       local_irq_save(flags);
> -     latest_ns = this_cpu_read(cpu_hardirq_time);
> -     if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
> -             ret = 1;
> +     irq_cputime = nsecs_to_cputime(this_cpu_read(cpu_hardirq_time)) -
> +                   cpustat[CPUTIME_IRQ];

We might want to keep nsecs_to_cputime64(). If cputime_t == jiffies_t == 
unsigned long,
we may have a problem after 49 days of interrupts. Arguably that's a lot of IRQs
but lets be paranoid.

> +     irq_cputime = min(irq_cputime, maxtime);
> +     cpustat[CPUTIME_IRQ] += irq_cputime;
>       local_irq_restore(flags);
> -     return ret;
> +     return irq_cputime;
>  }
>  
> -static int irqtime_account_si_update(void)
> +static cputime_t irqtime_account_si_update(cputime_t maxtime)
>  {
>       u64 *cpustat = kcpustat_this_cpu->cpustat;
>       unsigned long flags;
> -     u64 latest_ns;
> -     int ret = 0;
> +     cputime_t softirq_cputime;
>  
>       local_irq_save(flags);
> -     latest_ns = this_cpu_read(cpu_softirq_time);
> -     if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
> -             ret = 1;
> +     softirq_cputime = nsecs_to_cputime(this_cpu_read(cpu_softirq_time)) -

Ditto.

> +                       cpustat[CPUTIME_SOFTIRQ];
> +     softirq_cputime = min(softirq_cputime, maxtime);
> +     cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
>       local_irq_restore(flags);
> -     return ret;
> +     return softirq_cputime;
>  }
>  
>  #else /* CONFIG_IRQ_TIME_ACCOUNTING */
>  
>  #define sched_clock_irqtime  (0)
>  
> +static cputime_t irqtime_account_hi_update(cputime_t dummy)
> +{
> +     return 0;
> +}
> +
> +static cputime_t irqtime_account_si_update(cputime_t dummy)
> +{
> +     return 0;
> +}
> +
>  #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
>  
>  static inline void task_group_account_field(struct task_struct *p, int index,
> @@ -257,32 +267,45 @@ void account_idle_time(cputime_t cputime)
>               cpustat[CPUTIME_IDLE] += (__force u64) cputime;
>  }
>  
> -static __always_inline unsigned long steal_account_process_tick(unsigned 
> long max_jiffies)
> +static __always_inline cputime_t steal_account_process_time(cputime_t 
> maxtime)
>  {
>  #ifdef CONFIG_PARAVIRT
>       if (static_key_false(&paravirt_steal_enabled)) {
> +             cputime_t steal_cputime;
>               u64 steal;
> -             unsigned long steal_jiffies;
>  
>               steal = paravirt_steal_clock(smp_processor_id());
>               steal -= this_rq()->prev_steal_time;
> +             this_rq()->prev_steal_time += steal;

We are accounting steal_cputime but you make it remember steal_nsecs. This is
leaking quite some steal time in the way.

Imagine that cputime_t == jiffies_t and HZ=100.
paravirt_steal_clock() returns 199 nsecs. prev_steal_time gets added those 199.
nsecs_to_cputime() return 1 jiffy (we are one nsec off the next jiffy). So
account_steal_time() is accounting 1 jiffy and the 99 remaining nsecs are 
leaked.
If some more steal time is to be accounted on the next tick, the 99 previous 
nsecs
are forgotten.

A non-leaking sequence would rather be:

        steal = paravirt_steal_clock(smp_processor_id());
        steal -= this_rq()->prev_steal_time;
        steal_cputime = min(nsecs_to_cputime(steal), maxtime);
        account_steal_time(steal_cputime);
        this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);

Thanks!

>  
> -             /*
> -              * steal is in nsecs but our caller is expecting steal
> -              * time in jiffies. Lets cast the result to jiffies
> -              * granularity and account the rest on the next rounds.
> -              */
> -             steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies);
> -             this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
> +             steal_cputime = min(nsecs_to_cputime(steal), maxtime);
> +             account_steal_time(steal_cputime);
>  
> -             account_steal_time(jiffies_to_cputime(steal_jiffies));
> -             return steal_jiffies;
> +             return steal_cputime;

Reply via email to