On (03/15/16 11:03), Jan Kara wrote:
[..]
> > +   if (!sync_print) {
> > +           if (printk_thread && !in_panic) {
> > +                   /*
> > +                    * This will wakeup the printing kthread and offload
> > +                    * printing to a schedulable context.
> > +                    */
> > +                   __this_cpu_or(printk_pending,
> > +                                   PRINTK_PENDING_KTHREAD_OUTPUT);
> > +                   irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > +           } else if (in_sched) {
> > +                   /*
> > +                    * @in_sched messages may come too early, when we don't
> > +                    * yet have @printk_thread. We can't print deferred
> > +                    * messages directly, because this may deadlock, route
> > +                    * them via IRQ context.
> > +                    */
> > +                   __this_cpu_or(printk_pending,
> > +                                   PRINTK_PENDING_IRQ_OUTPUT);
> > +                   irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > +           } else {
> > +                   sync_print = true;
> > +           }
> > +   }
> 
> I'm a bit undecided whether we want to go through irq work even for the
> common case of !in_sched messages or whether we want to directly call
> wake_up() in that case. Maybe I'd do it like:

thanks for review. I like the change that you have suggested. just
updated wake_up_klogd_work_func() check with '!printk_sync && printk_thread',
user can change `printk_sync' via sysfs.


what I wanted to do with this "irq indirection" is to move wake_up() out of
vprintk_emit(), but I didn't think good enough, I guess.

so, __wake_up() takes the spin_lock_irqsave(&q->lock, flags), which with
spin_debug enabled, can recursively call printk(). since we wake_up()
the printing thread after we release logbuf_lock and UINT_MAX logbuf_cpu,
we have no chance to survive this recursion.

/* theretical possibilities of having printk() in __wake_up_common() or down
 * the call chain are not real concerns. */

so I guess what I want to do now -- is to move part of this wake_up()
decision making under the logbuf_lock and logbuf_cpu area. so recursion
detection code will be able to catch this. and we also need to switch
back to sync printk once the recursion happened; just to shut up the
faulty wake_up() in case if it was the root cause.


something like this (*minimally tested so far*).

-- move wake_up() and friends under the logbuf section; so we can detect
   printk() recursion from wake_up()

-- in recursion_bug branch switch to sync printk. we don't know why did we
   recurse, may be because of wake_up()->spin_lock(). doing
   kthread_stop()->wake_up_process() can be unsafe, I guess, just set
   `printk_sync' to true.


Signed-off-by: Jan Kara <[email protected]>
Signed-off-by: Sergey Senozhatsky <[email protected]>
---
 Documentation/kernel-parameters.txt |  10 ++
 kernel/printk/printk.c              | 202 +++++++++++++++++++++++++-----------
 2 files changed, 154 insertions(+), 58 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 1e58ae9..454999e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3114,6 +3114,16 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
        printk.time=    Show timing data prefixed to each printk message line
                        Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 
+       printk.synchronous=
+                       By default kernel messages are printed to console
+                       asynchronously (except during early boot or when oops
+                       is happening). That avoids kernel stalling behind slow
+                       serial console and thus avoids softlockups, interrupt
+                       timeouts, or userspace timing out during heavy printing.
+                       However for debugging problems, printing messages to
+                       console immediately may be desirable. This option
+                       enables such behavior.
+
        processor.max_cstate=   [HW,ACPI]
                        Limit processor to maximum C-state
                        max_cstate=9 overrides any DMI blacklist limit.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d5fd844..38baed1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -46,6 +46,7 @@
 #include <linux/utsname.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 #include <asm-generic/sections.h>
@@ -284,6 +285,105 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+/*
+ * When true, printing to console will happen synchronously unless someone else
+ * is already printing messages.
+ *
+ * The default value on UP systems is 'true'.
+ */
+static bool __read_mostly printk_sync = !IS_ENABLED(CONFIG_SMP);
+module_param_named(synchronous, printk_sync, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(synchronous, "make printing to console synchronous");
+
+/* Printing kthread for async vprintk_emit() */
+static struct task_struct *printk_thread;
+/* Wait for printing wakeups from async vprintk_emit() */
+static DECLARE_WAIT_QUEUE_HEAD(printing_wait);
+
+static int printing_func(void *data)
+{
+       while (1) {
+               DECLARE_WAITQUEUE(wait, current);
+               set_current_state(TASK_INTERRUPTIBLE);
+               add_wait_queue(&printing_wait, &wait);
+
+               schedule();
+               remove_wait_queue(&printing_wait, &wait);
+
+               console_lock();
+               console_unlock();
+       }
+
+       return 0;
+}
+
+static int __init init_printk_thread(void)
+{
+       if (printk_sync)
+               return 0;
+
+       printk_thread = kthread_run(printing_func, NULL, "printk");
+       BUG_ON(IS_ERR(printk_thread));
+       return 0;
+}
+late_initcall(init_printk_thread);
+
+/*
+ * Delayed printk version, for scheduler-internal messages:
+ */
+#define PRINTK_PENDING_WAKEUP  (1<<0)
+#define PRINTK_PENDING_OUTPUT  (1<<1)
+
+static DEFINE_PER_CPU(int, printk_pending);
+
+static void wake_up_klogd_work_func(struct irq_work *irq_work)
+{
+       int pending = __this_cpu_xchg(printk_pending, 0);
+
+       if (pending & PRINTK_PENDING_OUTPUT) {
+               if (!printk_sync && printk_thread) {
+                       wake_up(&printing_wait);
+               } else {
+                       /*
+                        * If trylock fails, someone else is doing
+                        * the printing
+                        */
+                       if (console_trylock())
+                               console_unlock();
+               }
+       }
+
+       if (pending & PRINTK_PENDING_WAKEUP)
+               wake_up_interruptible(&log_wait);
+}
+
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+       .func = wake_up_klogd_work_func,
+       .flags = IRQ_WORK_LAZY,
+};
+
+void wake_up_klogd(void)
+{
+       preempt_disable();
+       if (waitqueue_active(&log_wait)) {
+               this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+               irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+       }
+       preempt_enable();
+}
+
+int printk_deferred(const char *fmt, ...)
+{
+       va_list args;
+       int r;
+
+       va_start(args, fmt);
+       r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
+       va_end(args);
+
+       return r;
+}
+
 /* Return log buffer address */
 char *log_buf_addr_get(void)
 {
@@ -1609,6 +1709,8 @@ asmlinkage int vprintk_emit(int facility, int level,
                            const char *dict, size_t dictlen,
                            const char *fmt, va_list args)
 {
+       /* cpu currently holding logbuf_lock in this function */
+       static unsigned int logbuf_cpu = UINT_MAX;
        static bool recursion_bug;
        static char textbuf[LOG_LINE_MAX];
        char *text = textbuf;
@@ -1618,12 +1720,17 @@ asmlinkage int vprintk_emit(int facility, int level,
        int this_cpu;
        int printed_len = 0;
        bool in_sched = false;
-       /* cpu currently holding logbuf_lock in this function */
-       static unsigned int logbuf_cpu = UINT_MAX;
+       bool in_panic = console_loglevel == CONSOLE_LOGLEVEL_MOTORMOUTH;
+       bool sync_print = printk_sync;
 
        if (level == LOGLEVEL_SCHED) {
                level = LOGLEVEL_DEFAULT;
+               /*
+                * Deferred sched messages must not be printed
+                * synchronously regardless the @printk_sync or @in_panic.
+                */
                in_sched = true;
+               sync_print = false;
        }
 
        boot_delay_msec(level);
@@ -1660,6 +1767,7 @@ asmlinkage int vprintk_emit(int facility, int level,
                static const char recursion_msg[] =
                        "BUG: recent printk recursion!";
 
+               printk_sync = true;
                recursion_bug = false;
                /* emit KERN_CRIT message */
                printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
@@ -1749,13 +1857,45 @@ asmlinkage int vprintk_emit(int facility, int level,
                                                 dict, dictlen, text, text_len);
        }
 
+       /*
+        * By default we print message to console asynchronously so that kernel
+        * doesn't get stalled due to slow serial console. That can lead to
+        * softlockups, lost interrupts, or userspace timing out under heavy
+        * printing load.
+        *
+        * However we resort to synchronous printing of messages during early
+        * boot, when synchronous printing was explicitly requested by
+        * kernel parameter, or when console_verbose() was called to print
+        * everything during panic / oops.
+        */
+       if (!sync_print) {
+               if (in_sched) {
+                       /*
+                        * @in_sched messages may come too early, when we don't
+                        * yet have @printk_thread. We can't print deferred
+                        * messages directly, because this may deadlock, route
+                        * them via IRQ context.
+                        */
+                       __this_cpu_or(printk_pending,
+                                       PRINTK_PENDING_OUTPUT);
+                       irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+               } else if (printk_thread && !in_panic) {
+                       /*
+                        * This will wakeup the printing kthread and offload
+                        * printing to a schedulable context.
+                        */
+                       wake_up(&printing_wait);
+               } else {
+                       sync_print = true;
+               }
+       }
+
        logbuf_cpu = UINT_MAX;
        raw_spin_unlock(&logbuf_lock);
        lockdep_on();
        local_irq_restore(flags);
 
-       /* If called from the scheduler, we can not call up(). */
-       if (!in_sched) {
+       if (sync_print) {
                lockdep_off();
                /*
                 * Try to acquire and then immediately release the console
@@ -2724,60 +2864,6 @@ late_initcall(printk_late_init);
 
 #if defined CONFIG_PRINTK
 /*
- * Delayed printk version, for scheduler-internal messages:
- */
-#define PRINTK_PENDING_WAKEUP  0x01
-#define PRINTK_PENDING_OUTPUT  0x02
-
-static DEFINE_PER_CPU(int, printk_pending);
-
-static void wake_up_klogd_work_func(struct irq_work *irq_work)
-{
-       int pending = __this_cpu_xchg(printk_pending, 0);
-
-       if (pending & PRINTK_PENDING_OUTPUT) {
-               /* If trylock fails, someone else is doing the printing */
-               if (console_trylock())
-                       console_unlock();
-       }
-
-       if (pending & PRINTK_PENDING_WAKEUP)
-               wake_up_interruptible(&log_wait);
-}
-
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
-       .func = wake_up_klogd_work_func,
-       .flags = IRQ_WORK_LAZY,
-};
-
-void wake_up_klogd(void)
-{
-       preempt_disable();
-       if (waitqueue_active(&log_wait)) {
-               this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
-               irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
-       }
-       preempt_enable();
-}
-
-int printk_deferred(const char *fmt, ...)
-{
-       va_list args;
-       int r;
-
-       preempt_disable();
-       va_start(args, fmt);
-       r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
-       va_end(args);
-
-       __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
-       irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
-       preempt_enable();
-
-       return r;
-}
-
-/*
  * printk rate limiting, lifted from the networking subsystem.
  *
  * This enforces a rate limit: not more than 10 kernel messages
-- 
2.8.0.rc0

Reply via email to