Re: [RFC PATCH 2/2] softirq: Per vector thread deferment

2018-01-12 Thread Frederic Weisbecker
On Fri, Jan 12, 2018 at 10:07:25AM +0100, Paolo Abeni wrote:
> On Fri, 2018-01-12 at 06:35 +0100, Frederic Weisbecker wrote:
> > Some softirq vectors can be more CPU hungry than others. Especially
> > networking may sometimes deal with packet storm and need more CPU than
> > IRQ tail can offer without inducing scheduler latencies. In this case
> > the current code defers to ksoftirqd that behaves nicer. Now this nice
> > behaviour can be bad for other IRQ vectors that usually need quick
> > processing.
> > 
> > To solve this we only defer to threading the vectors that outreached the
> > time limit on IRQ tail processing and leave the others inline on real
> > Soft-IRQs service. This is achieved using workqueues with
> > per-CPU/per-vector worklets.
> > 
> > Note ksoftirqd is not removed as it is still needed for threaded IRQs
> > mode.
> > 
> > Suggested-by: Linus Torvalds 
> > Signed-off-by: Frederic Weisbecker 
> > Cc: Dmitry Safonov 
> > Cc: Eric Dumazet 
> > Cc: Linus Torvalds 
> > Cc: Peter Zijlstra 
> > Cc: Andrew Morton 
> > Cc: David Miller 
> > Cc: Hannes Frederic Sowa 
> > Cc: Ingo Molnar 
> > Cc: Levin Alexander 
> > Cc: Paolo Abeni 
> > Cc: Paul E. McKenney 
> > Cc: Radu Rendec 
> > Cc: Rik van Riel 
> > Cc: Stanislaw Gruszka 
> > Cc: Thomas Gleixner 
> > Cc: Wanpeng Li 
> > ---
> >  kernel/softirq.c | 90 
> > ++--
> >  1 file changed, 87 insertions(+), 3 deletions(-)
> > 
> > diff --git a/kernel/softirq.c b/kernel/softirq.c
> > index fa267f7..0c817ec6 100644
> > --- a/kernel/softirq.c
> > +++ b/kernel/softirq.c
> > @@ -74,6 +74,13 @@ struct softirq_stat {
> >  
> >  static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
> >  
> > +struct vector_work {
> > +   int vec;
> > +   struct work_struct work;
> > +};
> > +
> > +static DEFINE_PER_CPU(struct vector_work[NR_SOFTIRQS], vector_work_cpu);
> > +
> >  /*
> >   * we cannot loop indefinitely here to avoid userspace starvation,
> >   * but we also don't want to introduce a worst case 1/HZ latency
> > @@ -251,6 +258,70 @@ static inline bool lockdep_softirq_start(void) { 
> > return false; }
> >  static inline void lockdep_softirq_end(bool in_hardirq) { }
> >  #endif
> >  
> > +static void vector_work_func(struct work_struct *work)
> > +{
> > +   struct vector_work *vector_work;
> > +   u32 pending;
> > +   int vec;
> > +
> > +   vector_work = container_of(work, struct vector_work, work);
> > +   vec = vector_work->vec;
> > +
> > +   local_irq_disable();
> > +   pending = local_softirq_pending();
> > +   account_irq_enter_time(current);
> > +   __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
> > +   lockdep_softirq_enter();
> > +   set_softirq_pending(pending & ~(1 << vec));
> > +   local_irq_enable();
> > +
> > +   if (pending & (1 << vec)) {
> > +   struct softirq_action *sa = _vec[vec];
> > +
> > +   kstat_incr_softirqs_this_cpu(vec);
> > +   trace_softirq_entry(vec);
> > +   sa->action(sa);
> > +   trace_softirq_exit(vec);
> > +   }
> > +
> > +   local_irq_disable();
> > +
> > +   pending = local_softirq_pending();
> > +   if (pending & (1 << vec))
> > +   schedule_work_on(smp_processor_id(), work);
> 
> If we check for the overrun condition here, as done in the
> __do_softirq() main loop, we could avoid ksoftirqd completely and
> probably have less code duplication.

Yes that could be possible indeed. I guess having workqueues serializing
vector works is not much different that what ksoftirqd does.

I can try that.

> 
> > +
> > +   lockdep_softirq_exit();
> > +   account_irq_exit_time(current);
> > +   __local_bh_enable(SOFTIRQ_OFFSET);
> > +   local_irq_enable();
> > +}
> > +
> > +static int do_softirq_overrun(u32 overrun, u32 pending)
> > +{
> > +   struct softirq_action *h = softirq_vec;
> > +   int softirq_bit;
> > +
> > +   if (!overrun)
> > +   return pending;
> > +
> > +   overrun &= pending;
> > +   pending &= ~overrun;
> > +
> > +   while ((softirq_bit = ffs(overrun))) {
> > +   struct vector_work *work;
> > +   unsigned int vec_nr;
> > +
> > +   h += softirq_bit - 1;
> > +   vec_nr = h - softirq_vec;
> > +   work = this_cpu_ptr(_work_cpu[vec_nr]);
> > +   schedule_work_on(smp_processor_id(), >work);
> > +   h++;
> > +   overrun >>= softirq_bit;
> > +   }
> > +
> > +   return pending;
> > +}
> > +
> >  asmlinkage __visible void __softirq_entry __do_softirq(void)
> >  {
> > struct softirq_stat *sstat = this_cpu_ptr(_stat_cpu);
> > @@ -321,10 

Re: [RFC PATCH 2/2] softirq: Per vector thread deferment

2018-01-12 Thread Frederic Weisbecker
On Fri, Jan 12, 2018 at 10:07:25AM +0100, Paolo Abeni wrote:
> On Fri, 2018-01-12 at 06:35 +0100, Frederic Weisbecker wrote:
> > Some softirq vectors can be more CPU hungry than others. Especially
> > networking may sometimes deal with packet storm and need more CPU than
> > IRQ tail can offer without inducing scheduler latencies. In this case
> > the current code defers to ksoftirqd that behaves nicer. Now this nice
> > behaviour can be bad for other IRQ vectors that usually need quick
> > processing.
> > 
> > To solve this we only defer to threading the vectors that outreached the
> > time limit on IRQ tail processing and leave the others inline on real
> > Soft-IRQs service. This is achieved using workqueues with
> > per-CPU/per-vector worklets.
> > 
> > Note ksoftirqd is not removed as it is still needed for threaded IRQs
> > mode.
> > 
> > Suggested-by: Linus Torvalds 
> > Signed-off-by: Frederic Weisbecker 
> > Cc: Dmitry Safonov 
> > Cc: Eric Dumazet 
> > Cc: Linus Torvalds 
> > Cc: Peter Zijlstra 
> > Cc: Andrew Morton 
> > Cc: David Miller 
> > Cc: Hannes Frederic Sowa 
> > Cc: Ingo Molnar 
> > Cc: Levin Alexander 
> > Cc: Paolo Abeni 
> > Cc: Paul E. McKenney 
> > Cc: Radu Rendec 
> > Cc: Rik van Riel 
> > Cc: Stanislaw Gruszka 
> > Cc: Thomas Gleixner 
> > Cc: Wanpeng Li 
> > ---
> >  kernel/softirq.c | 90 
> > ++--
> >  1 file changed, 87 insertions(+), 3 deletions(-)
> > 
> > diff --git a/kernel/softirq.c b/kernel/softirq.c
> > index fa267f7..0c817ec6 100644
> > --- a/kernel/softirq.c
> > +++ b/kernel/softirq.c
> > @@ -74,6 +74,13 @@ struct softirq_stat {
> >  
> >  static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
> >  
> > +struct vector_work {
> > +   int vec;
> > +   struct work_struct work;
> > +};
> > +
> > +static DEFINE_PER_CPU(struct vector_work[NR_SOFTIRQS], vector_work_cpu);
> > +
> >  /*
> >   * we cannot loop indefinitely here to avoid userspace starvation,
> >   * but we also don't want to introduce a worst case 1/HZ latency
> > @@ -251,6 +258,70 @@ static inline bool lockdep_softirq_start(void) { 
> > return false; }
> >  static inline void lockdep_softirq_end(bool in_hardirq) { }
> >  #endif
> >  
> > +static void vector_work_func(struct work_struct *work)
> > +{
> > +   struct vector_work *vector_work;
> > +   u32 pending;
> > +   int vec;
> > +
> > +   vector_work = container_of(work, struct vector_work, work);
> > +   vec = vector_work->vec;
> > +
> > +   local_irq_disable();
> > +   pending = local_softirq_pending();
> > +   account_irq_enter_time(current);
> > +   __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
> > +   lockdep_softirq_enter();
> > +   set_softirq_pending(pending & ~(1 << vec));
> > +   local_irq_enable();
> > +
> > +   if (pending & (1 << vec)) {
> > +   struct softirq_action *sa = _vec[vec];
> > +
> > +   kstat_incr_softirqs_this_cpu(vec);
> > +   trace_softirq_entry(vec);
> > +   sa->action(sa);
> > +   trace_softirq_exit(vec);
> > +   }
> > +
> > +   local_irq_disable();
> > +
> > +   pending = local_softirq_pending();
> > +   if (pending & (1 << vec))
> > +   schedule_work_on(smp_processor_id(), work);
> 
> If we check for the overrun condition here, as done in the
> __do_softirq() main loop, we could avoid ksoftirqd completely and
> probably have less code duplication.

Yes that could be possible indeed. I guess having workqueues serializing
vector works is not much different that what ksoftirqd does.

I can try that.

> 
> > +
> > +   lockdep_softirq_exit();
> > +   account_irq_exit_time(current);
> > +   __local_bh_enable(SOFTIRQ_OFFSET);
> > +   local_irq_enable();
> > +}
> > +
> > +static int do_softirq_overrun(u32 overrun, u32 pending)
> > +{
> > +   struct softirq_action *h = softirq_vec;
> > +   int softirq_bit;
> > +
> > +   if (!overrun)
> > +   return pending;
> > +
> > +   overrun &= pending;
> > +   pending &= ~overrun;
> > +
> > +   while ((softirq_bit = ffs(overrun))) {
> > +   struct vector_work *work;
> > +   unsigned int vec_nr;
> > +
> > +   h += softirq_bit - 1;
> > +   vec_nr = h - softirq_vec;
> > +   work = this_cpu_ptr(_work_cpu[vec_nr]);
> > +   schedule_work_on(smp_processor_id(), >work);
> > +   h++;
> > +   overrun >>= softirq_bit;
> > +   }
> > +
> > +   return pending;
> > +}
> > +
> >  asmlinkage __visible void __softirq_entry __do_softirq(void)
> >  {
> > struct softirq_stat *sstat = this_cpu_ptr(_stat_cpu);
> > @@ -321,10 +392,13 @@ asmlinkage __visible void __softirq_entry 
> > __do_softirq(void)
> >  
> > pending = local_softirq_pending();
> > if (pending) {
> > -   if (overrun || need_resched())
> > +   if (need_resched()) {
> > wakeup_softirqd();
> > -   else
> > -   goto restart;
> > +   } else {
> > +   pending = 

Re: [RFC PATCH 2/2] softirq: Per vector thread deferment

2018-01-12 Thread Paolo Abeni
On Fri, 2018-01-12 at 06:35 +0100, Frederic Weisbecker wrote:
> Some softirq vectors can be more CPU hungry than others. Especially
> networking may sometimes deal with packet storm and need more CPU than
> IRQ tail can offer without inducing scheduler latencies. In this case
> the current code defers to ksoftirqd that behaves nicer. Now this nice
> behaviour can be bad for other IRQ vectors that usually need quick
> processing.
> 
> To solve this we only defer to threading the vectors that outreached the
> time limit on IRQ tail processing and leave the others inline on real
> Soft-IRQs service. This is achieved using workqueues with
> per-CPU/per-vector worklets.
> 
> Note ksoftirqd is not removed as it is still needed for threaded IRQs
> mode.
> 
> Suggested-by: Linus Torvalds 
> Signed-off-by: Frederic Weisbecker 
> Cc: Dmitry Safonov 
> Cc: Eric Dumazet 
> Cc: Linus Torvalds 
> Cc: Peter Zijlstra 
> Cc: Andrew Morton 
> Cc: David Miller 
> Cc: Hannes Frederic Sowa 
> Cc: Ingo Molnar 
> Cc: Levin Alexander 
> Cc: Paolo Abeni 
> Cc: Paul E. McKenney 
> Cc: Radu Rendec 
> Cc: Rik van Riel 
> Cc: Stanislaw Gruszka 
> Cc: Thomas Gleixner 
> Cc: Wanpeng Li 
> ---
>  kernel/softirq.c | 90 
> ++--
>  1 file changed, 87 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index fa267f7..0c817ec6 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -74,6 +74,13 @@ struct softirq_stat {
>  
>  static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
>  
> +struct vector_work {
> + int vec;
> + struct work_struct work;
> +};
> +
> +static DEFINE_PER_CPU(struct vector_work[NR_SOFTIRQS], vector_work_cpu);
> +
>  /*
>   * we cannot loop indefinitely here to avoid userspace starvation,
>   * but we also don't want to introduce a worst case 1/HZ latency
> @@ -251,6 +258,70 @@ static inline bool lockdep_softirq_start(void) { return 
> false; }
>  static inline void lockdep_softirq_end(bool in_hardirq) { }
>  #endif
>  
> +static void vector_work_func(struct work_struct *work)
> +{
> + struct vector_work *vector_work;
> + u32 pending;
> + int vec;
> +
> + vector_work = container_of(work, struct vector_work, work);
> + vec = vector_work->vec;
> +
> + local_irq_disable();
> + pending = local_softirq_pending();
> + account_irq_enter_time(current);
> + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
> + lockdep_softirq_enter();
> + set_softirq_pending(pending & ~(1 << vec));
> + local_irq_enable();
> +
> + if (pending & (1 << vec)) {
> + struct softirq_action *sa = _vec[vec];
> +
> + kstat_incr_softirqs_this_cpu(vec);
> + trace_softirq_entry(vec);
> + sa->action(sa);
> + trace_softirq_exit(vec);
> + }
> +
> + local_irq_disable();
> +
> + pending = local_softirq_pending();
> + if (pending & (1 << vec))
> + schedule_work_on(smp_processor_id(), work);

If we check for the overrun condition here, as done in the
__do_softirq() main loop, we could avoid ksoftirqd completely and
probably have less code duplication.

> +
> + lockdep_softirq_exit();
> + account_irq_exit_time(current);
> + __local_bh_enable(SOFTIRQ_OFFSET);
> + local_irq_enable();
> +}
> +
> +static int do_softirq_overrun(u32 overrun, u32 pending)
> +{
> + struct softirq_action *h = softirq_vec;
> + int softirq_bit;
> +
> + if (!overrun)
> + return pending;
> +
> + overrun &= pending;
> + pending &= ~overrun;
> +
> + while ((softirq_bit = ffs(overrun))) {
> + struct vector_work *work;
> + unsigned int vec_nr;
> +
> + h += softirq_bit - 1;
> + vec_nr = h - softirq_vec;
> + work = this_cpu_ptr(_work_cpu[vec_nr]);
> + schedule_work_on(smp_processor_id(), >work);
> + h++;
> + overrun >>= softirq_bit;
> + }
> +
> + return pending;
> +}
> +
>  asmlinkage __visible void __softirq_entry __do_softirq(void)
>  {
>   struct softirq_stat *sstat = this_cpu_ptr(_stat_cpu);
> @@ -321,10 +392,13 @@ asmlinkage __visible void __softirq_entry 
> __do_softirq(void)
>  
>   pending = local_softirq_pending();
>   if (pending) {
> - if (overrun || need_resched())
> + if (need_resched()) {
>   wakeup_softirqd();
> - else
> - goto restart;
> + } else {
> + pending = 

Re: [RFC PATCH 2/2] softirq: Per vector thread deferment

2018-01-12 Thread Paolo Abeni
On Fri, 2018-01-12 at 06:35 +0100, Frederic Weisbecker wrote:
> Some softirq vectors can be more CPU hungry than others. Especially
> networking may sometimes deal with packet storm and need more CPU than
> IRQ tail can offer without inducing scheduler latencies. In this case
> the current code defers to ksoftirqd that behaves nicer. Now this nice
> behaviour can be bad for other IRQ vectors that usually need quick
> processing.
> 
> To solve this we only defer to threading the vectors that outreached the
> time limit on IRQ tail processing and leave the others inline on real
> Soft-IRQs service. This is achieved using workqueues with
> per-CPU/per-vector worklets.
> 
> Note ksoftirqd is not removed as it is still needed for threaded IRQs
> mode.
> 
> Suggested-by: Linus Torvalds 
> Signed-off-by: Frederic Weisbecker 
> Cc: Dmitry Safonov 
> Cc: Eric Dumazet 
> Cc: Linus Torvalds 
> Cc: Peter Zijlstra 
> Cc: Andrew Morton 
> Cc: David Miller 
> Cc: Hannes Frederic Sowa 
> Cc: Ingo Molnar 
> Cc: Levin Alexander 
> Cc: Paolo Abeni 
> Cc: Paul E. McKenney 
> Cc: Radu Rendec 
> Cc: Rik van Riel 
> Cc: Stanislaw Gruszka 
> Cc: Thomas Gleixner 
> Cc: Wanpeng Li 
> ---
>  kernel/softirq.c | 90 
> ++--
>  1 file changed, 87 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index fa267f7..0c817ec6 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -74,6 +74,13 @@ struct softirq_stat {
>  
>  static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
>  
> +struct vector_work {
> + int vec;
> + struct work_struct work;
> +};
> +
> +static DEFINE_PER_CPU(struct vector_work[NR_SOFTIRQS], vector_work_cpu);
> +
>  /*
>   * we cannot loop indefinitely here to avoid userspace starvation,
>   * but we also don't want to introduce a worst case 1/HZ latency
> @@ -251,6 +258,70 @@ static inline bool lockdep_softirq_start(void) { return 
> false; }
>  static inline void lockdep_softirq_end(bool in_hardirq) { }
>  #endif
>  
> +static void vector_work_func(struct work_struct *work)
> +{
> + struct vector_work *vector_work;
> + u32 pending;
> + int vec;
> +
> + vector_work = container_of(work, struct vector_work, work);
> + vec = vector_work->vec;
> +
> + local_irq_disable();
> + pending = local_softirq_pending();
> + account_irq_enter_time(current);
> + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
> + lockdep_softirq_enter();
> + set_softirq_pending(pending & ~(1 << vec));
> + local_irq_enable();
> +
> + if (pending & (1 << vec)) {
> + struct softirq_action *sa = _vec[vec];
> +
> + kstat_incr_softirqs_this_cpu(vec);
> + trace_softirq_entry(vec);
> + sa->action(sa);
> + trace_softirq_exit(vec);
> + }
> +
> + local_irq_disable();
> +
> + pending = local_softirq_pending();
> + if (pending & (1 << vec))
> + schedule_work_on(smp_processor_id(), work);

If we check for the overrun condition here, as done in the
__do_softirq() main loop, we could avoid ksoftirqd completely and
probably have less code duplication.

> +
> + lockdep_softirq_exit();
> + account_irq_exit_time(current);
> + __local_bh_enable(SOFTIRQ_OFFSET);
> + local_irq_enable();
> +}
> +
> +static int do_softirq_overrun(u32 overrun, u32 pending)
> +{
> + struct softirq_action *h = softirq_vec;
> + int softirq_bit;
> +
> + if (!overrun)
> + return pending;
> +
> + overrun &= pending;
> + pending &= ~overrun;
> +
> + while ((softirq_bit = ffs(overrun))) {
> + struct vector_work *work;
> + unsigned int vec_nr;
> +
> + h += softirq_bit - 1;
> + vec_nr = h - softirq_vec;
> + work = this_cpu_ptr(_work_cpu[vec_nr]);
> + schedule_work_on(smp_processor_id(), >work);
> + h++;
> + overrun >>= softirq_bit;
> + }
> +
> + return pending;
> +}
> +
>  asmlinkage __visible void __softirq_entry __do_softirq(void)
>  {
>   struct softirq_stat *sstat = this_cpu_ptr(_stat_cpu);
> @@ -321,10 +392,13 @@ asmlinkage __visible void __softirq_entry 
> __do_softirq(void)
>  
>   pending = local_softirq_pending();
>   if (pending) {
> - if (overrun || need_resched())
> + if (need_resched()) {
>   wakeup_softirqd();
> - else
> - goto restart;
> + } else {
> + pending = do_softirq_overrun(overrun, pending);
> + if (pending)
> + goto restart;
> + }
>   }
>  
>   lockdep_softirq_end(in_hardirq);

This way the 'overrun' branch is not triggered if we (also) need
resched, should we test for overrun first ?

Cheers,

Paolo


Re: [RFC PATCH 2/2] softirq: Per vector thread deferment

2018-01-11 Thread Frederic Weisbecker
On Fri, Jan 12, 2018 at 06:35:54AM +0100, Frederic Weisbecker wrote:
> Some softirq vectors can be more CPU hungry than others. Especially
> networking may sometimes deal with packet storm and need more CPU than
> IRQ tail can offer without inducing scheduler latencies. In this case
> the current code defers to ksoftirqd that behaves nicer. Now this nice
> behaviour can be bad for other IRQ vectors that usually need quick
> processing.
> 
> To solve this we only defer to threading the vectors that outreached the
> time limit on IRQ tail processing and leave the others inline on real
> Soft-IRQs service. This is achieved using workqueues with
> per-CPU/per-vector worklets.
> 
> Note ksoftirqd is not removed as it is still needed for threaded IRQs
> mode.
> 
> Suggested-by: Linus Torvalds 
> Signed-off-by: Frederic Weisbecker 
> Cc: Dmitry Safonov 
> Cc: Eric Dumazet 
> Cc: Linus Torvalds 
> Cc: Peter Zijlstra 
> Cc: Andrew Morton 
> Cc: David Miller 
> Cc: Hannes Frederic Sowa 
> Cc: Ingo Molnar 
> Cc: Levin Alexander 
> Cc: Paolo Abeni 
> Cc: Paul E. McKenney 
> Cc: Radu Rendec 
> Cc: Rik van Riel 
> Cc: Stanislaw Gruszka 
> Cc: Thomas Gleixner 
> Cc: Wanpeng Li 
> ---
>  kernel/softirq.c | 90 
> ++--
>  1 file changed, 87 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index fa267f7..0c817ec6 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -74,6 +74,13 @@ struct softirq_stat {
>  
>  static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
>  
> +struct vector_work {
> + int vec;
> + struct work_struct work;
> +};
> +
> +static DEFINE_PER_CPU(struct vector_work[NR_SOFTIRQS], vector_work_cpu);
> +
>  /*
>   * we cannot loop indefinitely here to avoid userspace starvation,
>   * but we also don't want to introduce a worst case 1/HZ latency
> @@ -251,6 +258,70 @@ static inline bool lockdep_softirq_start(void) { return 
> false; }
>  static inline void lockdep_softirq_end(bool in_hardirq) { }
>  #endif
>  
> +static void vector_work_func(struct work_struct *work)
> +{
> + struct vector_work *vector_work;
> + u32 pending;
> + int vec;
> +
> + vector_work = container_of(work, struct vector_work, work);
> + vec = vector_work->vec;
> +
> + local_irq_disable();
> + pending = local_softirq_pending();
> + account_irq_enter_time(current);
> + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
> + lockdep_softirq_enter();
> + set_softirq_pending(pending & ~(1 << vec));
> + local_irq_enable();
> +
> + if (pending & (1 << vec)) {

Ah I see the problem. Say in do_softirq() we had pending VECTOR 1 and 2.
And we had overrun only VECTOR 1 so VECTOR 1 is enqueued to workqueue.
Right after that we go back to the restart loop in do_softirq in order to
handle pending VECTOR 2 but we erase the local_softirqs_pending state. So
when the workqueue runs, it doesn't see anymore VECTOR 1 pending and we lose
it.

So I need to remove the above condition and make the vector work
unconditionally execute the vector callback.

Now I can go to sleep...


> + struct softirq_action *sa = _vec[vec];
> +
> + kstat_incr_softirqs_this_cpu(vec);
> + trace_softirq_entry(vec);
> + sa->action(sa);
> + trace_softirq_exit(vec);
> + }


Re: [RFC PATCH 2/2] softirq: Per vector thread deferment

2018-01-11 Thread Frederic Weisbecker
On Fri, Jan 12, 2018 at 06:35:54AM +0100, Frederic Weisbecker wrote:
> Some softirq vectors can be more CPU hungry than others. Especially
> networking may sometimes deal with packet storm and need more CPU than
> IRQ tail can offer without inducing scheduler latencies. In this case
> the current code defers to ksoftirqd that behaves nicer. Now this nice
> behaviour can be bad for other IRQ vectors that usually need quick
> processing.
> 
> To solve this we only defer to threading the vectors that outreached the
> time limit on IRQ tail processing and leave the others inline on real
> Soft-IRQs service. This is achieved using workqueues with
> per-CPU/per-vector worklets.
> 
> Note ksoftirqd is not removed as it is still needed for threaded IRQs
> mode.
> 
> Suggested-by: Linus Torvalds 
> Signed-off-by: Frederic Weisbecker 
> Cc: Dmitry Safonov 
> Cc: Eric Dumazet 
> Cc: Linus Torvalds 
> Cc: Peter Zijlstra 
> Cc: Andrew Morton 
> Cc: David Miller 
> Cc: Hannes Frederic Sowa 
> Cc: Ingo Molnar 
> Cc: Levin Alexander 
> Cc: Paolo Abeni 
> Cc: Paul E. McKenney 
> Cc: Radu Rendec 
> Cc: Rik van Riel 
> Cc: Stanislaw Gruszka 
> Cc: Thomas Gleixner 
> Cc: Wanpeng Li 
> ---
>  kernel/softirq.c | 90 
> ++--
>  1 file changed, 87 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index fa267f7..0c817ec6 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -74,6 +74,13 @@ struct softirq_stat {
>  
>  static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
>  
> +struct vector_work {
> + int vec;
> + struct work_struct work;
> +};
> +
> +static DEFINE_PER_CPU(struct vector_work[NR_SOFTIRQS], vector_work_cpu);
> +
>  /*
>   * we cannot loop indefinitely here to avoid userspace starvation,
>   * but we also don't want to introduce a worst case 1/HZ latency
> @@ -251,6 +258,70 @@ static inline bool lockdep_softirq_start(void) { return 
> false; }
>  static inline void lockdep_softirq_end(bool in_hardirq) { }
>  #endif
>  
> +static void vector_work_func(struct work_struct *work)
> +{
> + struct vector_work *vector_work;
> + u32 pending;
> + int vec;
> +
> + vector_work = container_of(work, struct vector_work, work);
> + vec = vector_work->vec;
> +
> + local_irq_disable();
> + pending = local_softirq_pending();
> + account_irq_enter_time(current);
> + __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
> + lockdep_softirq_enter();
> + set_softirq_pending(pending & ~(1 << vec));
> + local_irq_enable();
> +
> + if (pending & (1 << vec)) {

Ah I see the problem. Say in do_softirq() we had pending VECTOR 1 and 2.
And we had overrun only VECTOR 1 so VECTOR 1 is enqueued to workqueue.
Right after that we go back to the restart loop in do_softirq in order to
handle pending VECTOR 2 but we erase the local_softirqs_pending state. So
when the workqueue runs, it doesn't see anymore VECTOR 1 pending and we lose
it.

So I need to remove the above condition and make the vector work
unconditionally execute the vector callback.

Now I can go to sleep...


> + struct softirq_action *sa = _vec[vec];
> +
> + kstat_incr_softirqs_this_cpu(vec);
> + trace_softirq_entry(vec);
> + sa->action(sa);
> + trace_softirq_exit(vec);
> + }