Ksoftirqd only remains to implement threaded IRQs. Convert it to
existing per-vector workqueues to avoid code duplication.

Suggested-by: Linus Torvalds <torva...@linux-foundation.org>
Suggested-by: Paolo Abeni <pab...@redhat.com>
Signed-off-by: Frederic Weisbecker <frede...@kernel.org>
Cc: Dmitry Safonov <d...@arista.com>
Cc: Eric Dumazet <eduma...@google.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: David Miller <da...@davemloft.net>
Cc: Hannes Frederic Sowa <han...@stressinduktion.org>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Levin Alexander <alexander.le...@verizon.com>
Cc: Paolo Abeni <pab...@redhat.com>
Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
Cc: Radu Rendec <rren...@arista.com>
Cc: Rik van Riel <r...@redhat.com>
Cc: Stanislaw Gruszka <sgrus...@redhat.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Wanpeng Li <wanpeng...@hotmail.com>
Cc: Mauro Carvalho Chehab <mche...@s-opensource.com>
---
 Documentation/RCU/stallwarn.txt |  4 +-
 include/linux/interrupt.h       |  7 ----
 kernel/sched/cputime.c          | 13 +++---
 kernel/sched/sched.h            |  4 +-
 kernel/softirq.c                | 87 +++++++++--------------------------------
 net/ipv4/tcp_output.c           |  4 +-
 6 files changed, 31 insertions(+), 88 deletions(-)

diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index a08f928..ea3a8de 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -17,8 +17,8 @@ o     A CPU looping in an RCU read-side critical section.
 o      A CPU looping with interrupts disabled.
 
 o      A CPU looping with preemption disabled.  This condition can
-       result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
-       stalls.
+       result in RCU-sched stalls and, if softirq workqueue is in use,
+       RCU-bh stalls.
 
 o      A CPU looping with bottom halves disabled.  This condition can
        result in RCU-sched and RCU-bh stalls.
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 92d044d..680f620 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -507,13 +507,6 @@ extern void __raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq_irqoff(unsigned int nr);
 extern void raise_softirq(unsigned int nr);
 
-DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
-
-static inline struct task_struct *this_cpu_ksoftirqd(void)
-{
-       return this_cpu_read(ksoftirqd);
-}
-
 extern int softirq_serving_workqueue(void);
 
 /* Tasklets --- multithreaded analogue of BHs.
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 30f70e5..c5b8dbd 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -64,15 +64,14 @@ void irqtime_account_irq(struct task_struct *curr)
        irqtime->irq_start_time += delta;
 
        /*
-        * We do not account for softirq time from ksoftirqd here.
-        * We want to continue accounting softirq time to ksoftirqd thread
+        * We do not account for softirq time from workqueue here.
+        * We want to continue accounting softirq time to workqueue thread
         * in that case, so as not to confuse scheduler with a special task
         * that do not consume any time, but still wants to run.
         */
        if (hardirq_count())
                irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
-       else if (in_serving_softirq() && curr != this_cpu_ksoftirqd() &&
-                !softirq_serving_workqueue())
+       else if (in_serving_softirq() && !softirq_serving_workqueue())
                irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
@@ -376,11 +375,11 @@ static void irqtime_account_process_tick(struct 
task_struct *p, int user_tick,
 
        cputime -= other;
 
-       if (this_cpu_ksoftirqd() == p || softirq_serving_workqueue()) {
+       if (softirq_serving_workqueue()) {
                /*
-                * ksoftirqd time do not get accounted in cpu_softirq_time.
+                * Softirq wq time do not get accounted in cpu_softirq_time.
                 * So, we have to handle it separately here.
-                * Also, p->stime needs to be updated for ksoftirqd.
+                * Also, p->stime needs to be updated for workqueue.
                 */
                account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
        } else if (user_tick) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b19552a2..5d481f1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2061,8 +2061,8 @@ struct irqtime {
 DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
 
 /*
- * Returns the irqtime minus the softirq time computed by ksoftirqd.
- * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
+ * Returns the irqtime minus the softirq time computed by workqueue.
+ * Otherwise workqueue's sum_exec_runtime is substracted its own runtime
  * and never move forward.
  */
 static inline u64 irq_time_read(int cpu)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 441e654..b2a5384 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -55,8 +55,6 @@ EXPORT_SYMBOL(irq_stat);
 
 static struct softirq_action softirq_vec[NR_SOFTIRQS] 
__cacheline_aligned_in_smp;
 
-DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
-
 const char * const softirq_to_name[NR_SOFTIRQS] = {
        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
        "TASKLET", "SCHED", "HRTIMER", "RCU"
@@ -78,32 +76,6 @@ struct softirq {
 static DEFINE_PER_CPU(struct softirq, softirq_cpu);
 
 /*
- * we cannot loop indefinitely here to avoid userspace starvation,
- * but we also don't want to introduce a worst case 1/HZ latency
- * to the pending events, so lets the scheduler to balance
- * the softirq load for us.
- */
-static void wakeup_softirqd(void)
-{
-       /* Interrupts are disabled: no need to stop preemption */
-       struct task_struct *tsk = __this_cpu_read(ksoftirqd);
-
-       if (tsk && tsk->state != TASK_RUNNING)
-               wake_up_process(tsk);
-}
-
-/*
- * If ksoftirqd is scheduled, we do not want to process pending softirqs
- * right now. Let ksoftirqd handle this at its own rate, to get fairness.
- */
-static bool ksoftirqd_running(void)
-{
-       struct task_struct *tsk = __this_cpu_read(ksoftirqd);
-
-       return tsk && (tsk->state == TASK_RUNNING);
-}
-
-/*
  * preempt_count and SOFTIRQ_OFFSET usage:
  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  *   softirq processing.
@@ -408,7 +380,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 
 asmlinkage __visible void do_softirq(void)
 {
-       __u32 pending;
+       __u32 pending, pending_work;
        unsigned long flags;
 
        if (in_interrupt())
@@ -417,8 +389,9 @@ asmlinkage __visible void do_softirq(void)
        local_irq_save(flags);
 
        pending = local_softirq_pending();
+       pending_work = __this_cpu_read(softirq_cpu.pending_work_mask);
 
-       if (pending && !ksoftirqd_running())
+       if (pending & ~pending_work)
                do_softirq_own_stack();
 
        local_irq_restore(flags);
@@ -432,7 +405,7 @@ void irq_enter(void)
        rcu_irq_enter();
        if (is_idle_task(current) && !in_interrupt()) {
                /*
-                * Prevent raise_softirq from needlessly waking up ksoftirqd
+                * Prevent raise_softirq from needlessly waking up workqueue
                 * here, as softirq will be serviced on return from interrupt.
                 */
                local_bh_disable();
@@ -445,7 +418,15 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
-       if (ksoftirqd_running())
+       unsigned int pending_work, pending = local_softirq_pending();
+
+       if (!pending)
+               return;
+
+       pending_work = __this_cpu_read(softirq_cpu.pending_work_mask);
+       pending &= ~pending_work;
+
+       if (!pending)
                return;
 
        if (!force_irqthreads) {
@@ -465,7 +446,7 @@ static inline void invoke_softirq(void)
                do_softirq_own_stack();
 #endif
        } else {
-               wakeup_softirqd();
+               do_softirq_workqueue(pending);
        }
 }
 
@@ -494,7 +475,7 @@ void irq_exit(void)
 #endif
        account_irq_exit_time(current);
        preempt_count_sub(HARDIRQ_OFFSET);
-       if (!in_interrupt() && local_softirq_pending())
+       if (!in_interrupt())
                invoke_softirq();
 
        tick_irq_exit();
@@ -515,11 +496,11 @@ inline void raise_softirq_irqoff(unsigned int nr)
         * actually run the softirq once we return from
         * the irq or softirq.
         *
-        * Otherwise we wake up ksoftirqd to make sure we
+        * Otherwise we wake up workqueue to make sure we
         * schedule the softirq soon.
         */
        if (!in_interrupt())
-               wakeup_softirqd();
+               do_softirq_workqueue(BIT(nr));
 }
 
 void raise_softirq(unsigned int nr)
@@ -758,27 +739,6 @@ void __init softirq_init(void)
        open_softirq(HI_SOFTIRQ, tasklet_hi_action);
 }
 
-static int ksoftirqd_should_run(unsigned int cpu)
-{
-       return local_softirq_pending();
-}
-
-static void run_ksoftirqd(unsigned int cpu)
-{
-       local_irq_disable();
-       if (local_softirq_pending()) {
-               /*
-                * We can safely run softirq on inline stack, as we are not deep
-                * in the task stack here.
-                */
-               __do_softirq();
-               local_irq_enable();
-               cond_resched_rcu_qs();
-               return;
-       }
-       local_irq_enable();
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * tasklet_kill_immediate is called to remove a tasklet which can already be
@@ -841,22 +801,13 @@ static int takeover_tasklets(unsigned int cpu)
 #define takeover_tasklets      NULL
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static struct smp_hotplug_thread softirq_threads = {
-       .store                  = &ksoftirqd,
-       .thread_should_run      = ksoftirqd_should_run,
-       .thread_fn              = run_ksoftirqd,
-       .thread_comm            = "ksoftirqd/%u",
-};
-
-static __init int spawn_ksoftirqd(void)
+static __init int tasklet_set_takeover(void)
 {
        cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
                                  takeover_tasklets);
-       BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
-
        return 0;
 }
-early_initcall(spawn_ksoftirqd);
+early_initcall(tasklet_set_takeover);
 
 /*
  * [ These __weak aliases are kept in a separate compilation unit, so that
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4e4160..3b4811e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -912,7 +912,7 @@ void tcp_wfree(struct sk_buff *skb)
         */
        WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
 
-       /* If this softirq is serviced by ksoftirqd, we are likely under stress.
+       /* If this softirq is serviced by workqueue, we are likely under stress.
         * Wait until our queues (qdisc + devices) are drained.
         * This gives :
         * - less callbacks to tcp_write_xmit(), reducing stress (batches)
@@ -920,7 +920,7 @@ void tcp_wfree(struct sk_buff *skb)
         *   to migrate this flow (skb->ooo_okay will be eventually set)
         */
        if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) &&
-           (this_cpu_ksoftirqd() == current || softirq_serving_workqueue()))
+           softirq_serving_workqueue())
                goto out;
 
        for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
-- 
2.7.4

Reply via email to