This patch extends the idea of NMI per-cpu buffers to regions
that may cause recursive printk() calls and possible deadlocks.
Namely, printk() can't handle printk calls from schedule code
or printk() calls from lock debugging code (spin_dump() for instance);
because those may be called with `sem->lock' already taken or any
other `critical' locks (p->pi_lock, etc.). An example of deadlock
can be

 vprintk_emit()
  console_unlock()
   up()                        << raw_spin_lock_irqsave(&sem->lock, flags);
    wake_up_process()
     try_to_wake_up()
      ttwu_queue()
       ttwu_activate()
        activate_task()
         enqueue_task()
          enqueue_task_fair()
           cfs_rq_of()
            task_of()
             WARN_ON_ONCE(!entity_is_task(se))
              vprintk_emit()
               console_trylock()
                down_trylock()
                 raw_spin_lock_irqsave(&sem->lock, flags)
                 ^^^^ deadlock

and some other cases.

Just like in NMI implementation, the solution uses a per-cpu
`printk_func' pointer to 'redirect' printk() calls to a 'safe'
callback, that store messages in a per-cpu buffer and flushes
them back to logbuf buffer later.

Usage example:

 printk()
  printk_safe_enter(flags)
  //
  //  any printk() call from here will endup in vprintk_safe(),
  //  that stores messages in a special per-CPU buffer.
  //
  printk_safe_exit(flags)

The 'redirection' mechanism, though, has been reworked, as suggested
by Petr Mladek. Instead of using a per-cpu @print_func callback we now
keep a per-cpu printk-context variable and call either default or nmi
vprintk function depending on its value. printk_nmi_entrer/exit and
printk_safe_enter/exit, thus, just set/celar corresponding bits in
printk-context functions.

The patch only adds printk_safe support, we don't use it yet.

Signed-off-by: Sergey Senozhatsky <sergey.senozhat...@gmail.com>
---
 include/linux/printk.h      |  21 +++++--
 kernel/printk/Makefile      |   2 +-
 kernel/printk/internal.h    |  56 +++++++++++--------
 kernel/printk/printk.c      |   3 -
 kernel/printk/printk_safe.c | 130 +++++++++++++++++++++++++++++++++++---------
 5 files changed, 153 insertions(+), 59 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 67df41eca51c..f7d291cf61d9 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { }
 #endif
 
 #ifdef CONFIG_PRINTK_NMI
-extern void printk_safe_init(void);
 extern void printk_safe_nmi_enter(void);
 extern void printk_safe_nmi_exit(void);
-extern void printk_safe_flush(void);
-extern void printk_safe_flush_on_panic(void);
 #else
-static inline void printk_safe_init(void) { }
 static inline void printk_safe_nmi_enter(void) { }
 static inline void printk_safe_nmi_exit(void) { }
-static inline void printk_safe_flush(void) { }
-static inline void printk_safe_flush_on_panic(void) { }
 #endif /* PRINTK_NMI */
 
 #ifdef CONFIG_PRINTK
@@ -209,6 +203,9 @@ void __init setup_log_buf(int early);
 __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
 void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
+extern void printk_safe_init(void);
+extern void printk_safe_flush(void);
+extern void printk_safe_flush_on_panic(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char 
*log_lvl)
 static inline void show_regs_print_info(const char *log_lvl)
 {
 }
+
+static inline void printk_safe_init(void)
+{
+}
+
+static inline void printk_safe_flush(void)
+{
+}
+
+static inline void printk_safe_flush_on_panic(void)
+{
+}
 #endif
 
 extern asmlinkage void dump_stack(void) __cold;
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 607928119f26..4a2ffc39eb95 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,3 +1,3 @@
 obj-y  = printk.o
-obj-$(CONFIG_PRINTK_NMI)               += printk_safe.o
+obj-$(CONFIG_PRINTK)   += printk_safe.o
 obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)     += braille.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 7fd2838fa417..97cee4f482f4 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -16,26 +16,8 @@
  */
 #include <linux/percpu.h>
 
-typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
-
-int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
-
 #ifdef CONFIG_PRINTK_NMI
 
-extern raw_spinlock_t logbuf_lock;
-
-/*
- * printk() could not take logbuf_lock in NMI context. Instead,
- * it temporary stores the strings into a per-CPU buffer.
- * The alternative implementation is chosen transparently
- * via per-CPU variable.
- */
-DECLARE_PER_CPU(printk_func_t, printk_func);
-static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
-{
-       return this_cpu_read(printk_func)(fmt, args);
-}
-
 extern atomic_t nmi_message_lost;
 static inline int get_nmi_message_lost(void)
 {
@@ -44,14 +26,42 @@ static inline int get_nmi_message_lost(void)
 
 #else /* CONFIG_PRINTK_NMI */
 
-static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
-{
-       return vprintk_default(fmt, args);
-}
-
 static inline int get_nmi_message_lost(void)
 {
        return 0;
 }
 
 #endif /* CONFIG_PRINTK_NMI */
+
+#ifdef CONFIG_PRINTK
+
+#define PRINTK_SAFE_CONTEXT_MASK       0x7fffffff
+#define PRINTK_NMI_CONTEXT_MASK        0x80000000
+
+extern raw_spinlock_t logbuf_lock;
+
+__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
+__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
+void __printk_safe_enter(void);
+void __printk_safe_exit(void);
+
+#define printk_safe_enter(flags)               \
+       do {                                    \
+               local_irq_save(flags);          \
+               __printk_safe_enter();          \
+       } while (0)
+
+#define printk_safe_exit(flags)                \
+       do {                                    \
+               __printk_safe_exit();           \
+               local_irq_restore(flags);       \
+       } while (0)
+
+#else
+
+__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
+
+#define printk_safe_enter(f)   ((void)(f))
+#define printk_safe_exit(f)    ((void)(f))
+
+#endif /* CONFIG_PRINTK */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 265e815730f9..335b0fcb538e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1902,9 +1902,6 @@ static size_t msg_print_text(const struct printk_log *msg,
                             bool syslog, char *buf, size_t size) { return 0; }
 static bool suppress_message_printing(int level) { return false; }
 
-/* Still needs to be defined for users */
-DEFINE_PER_CPU(printk_func_t, printk_func);
-
 #endif /* CONFIG_PRINTK */
 
 #ifdef CONFIG_EARLY_PRINTK
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index 09e5552f5b58..a041927a127b 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -1,5 +1,5 @@
 /*
- * printk_safe.c - Safe printk in NMI context
+ * printk_safe.c - Safe printk for printk-deadlock-prone contexts
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -32,15 +32,14 @@
  * is later flushed into the main ring buffer via IRQ work.
  *
  * The alternative implementation is chosen transparently
- * via @printk_func per-CPU variable.
+ * by examinig current printk() context mask stored in @printk_context
+ * per-CPU variable.
  *
  * The implementation allows to flush the strings also from another CPU.
  * There are situations when we want to make sure that all buffers
  * were handled or when IRQs are blocked.
  */
-DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
 static int printk_safe_irq_ready;
-atomic_t nmi_message_lost;
 
 #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) -    \
                         sizeof(atomic_t) - sizeof(struct irq_work))
@@ -50,18 +49,29 @@ struct printk_safe_seq_buf {
        struct irq_work         work;   /* IRQ work that flushes the buffer */
        unsigned char           buffer[SAFE_LOG_BUF_LEN];
 };
+
+static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq);
+static DEFINE_PER_CPU(int, printk_context);
+
+#ifdef CONFIG_PRINTK_NMI
 static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
+atomic_t nmi_message_lost;
+#endif
 
 /*
- * Safe printk() for NMI context. It uses a per-CPU buffer to
- * store the message. NMIs are not nested, so there is always only
- * one writer running. But the buffer might get flushed from another
- * CPU, so we need to be careful.
+ * Add a message to per-CPU context-dependent buffer. NMI and printk-safe
+ * have dedicated buffers, because otherwise printk-safe preempted by
+ * NMI-printk would have overwritten the NMI messages.
+ *
+ * The messages are fushed from irq work (of from panic()), possibly,
+ * from other CPU, concurrently with printk_safe_log_store(). Should this
+ * happen, printk_safe_log_store() will notice the buffer->len mismatch
+ * and repeat the write.
  */
-static int vprintk_safe_nmi(const char *fmt, va_list args)
+static int printk_safe_log_store(struct printk_safe_seq_buf *s,
+                                const char *fmt, va_list args)
 {
-       struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
-       int add = 0;
+       int add;
        size_t len;
 
 again:
@@ -74,8 +84,8 @@ static int vprintk_safe_nmi(const char *fmt, va_list args)
        }
 
        /*
-        * Make sure that all old data have been read before the buffer was
-        * reseted. This is not needed when we just append data.
+        * Make sure that all old data have been read before the buffer
+        * was reset. This is not needed when we just append data.
         */
        if (!len)
                smp_rmb();
@@ -161,7 +171,7 @@ static int printk_safe_flush_buffer(const char *start, 
size_t len)
 }
 
 /*
- * Flush data from the associated per_CPU buffer. The function
+ * Flush data from the associated per-CPU buffer. The function
  * can be called either via IRQ work or independently.
  */
 static void __printk_safe_flush(struct irq_work *work)
@@ -231,8 +241,12 @@ void printk_safe_flush(void)
 {
        int cpu;
 
-       for_each_possible_cpu(cpu)
+       for_each_possible_cpu(cpu) {
+#ifdef CONFIG_PRINTK_NMI
                __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work);
+#endif
+               __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work);
+       }
 }
 
 /**
@@ -262,14 +276,88 @@ void printk_safe_flush_on_panic(void)
        printk_safe_flush();
 }
 
+#ifdef CONFIG_PRINTK_NMI
+/*
+ * Safe printk() for NMI context. It uses a per-CPU buffer to
+ * store the message. NMIs are not nested, so there is always only
+ * one writer running. But the buffer might get flushed from another
+ * CPU, so we need to be careful.
+ */
+static int vprintk_safe_nmi(const char *fmt, va_list args)
+{
+       struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
+
+       return printk_safe_log_store(s, fmt, args);
+}
+
+void printk_safe_nmi_enter(void)
+{
+       this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
+}
+
+void printk_safe_nmi_exit(void)
+{
+       this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
+}
+
+#else
+
+static int vprintk_safe_nmi(const char *fmt, va_list args)
+{
+       return 0;
+}
+
+#endif /* CONFIG_PRINTK_NMI */
+
+/*
+ * Lock-less printk(), to avoid deadlocks should the printk() recurse
+ * into itself. It uses a per-CPU buffer to store the message, just like
+ * NMI.
+ */
+static int vprintk_safe(const char *fmt, va_list args)
+{
+       struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq);
+
+       return printk_safe_log_store(s, fmt, args);
+}
+
+/* Can be preempted by NMI. */
+void __printk_safe_enter(void)
+{
+       this_cpu_inc(printk_context);
+}
+
+/* Can be preempted by NMI. */
+void __printk_safe_exit(void)
+{
+       this_cpu_dec(printk_context);
+}
+
+__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
+{
+       if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
+               return vprintk_safe_nmi(fmt, args);
+
+       if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
+               return vprintk_safe(fmt, args);
+
+       return vprintk_default(fmt, args);
+}
+
 void __init printk_safe_init(void)
 {
        int cpu;
 
        for_each_possible_cpu(cpu) {
-               struct printk_safe_seq_buf *s = &per_cpu(nmi_print_seq, cpu);
+               struct printk_safe_seq_buf *s;
+
+               s = &per_cpu(safe_print_seq, cpu);
+               init_irq_work(&s->work, __printk_safe_flush);
 
+#ifdef CONFIG_PRINTK_NMI
+               s = &per_cpu(nmi_print_seq, cpu);
                init_irq_work(&s->work, __printk_safe_flush);
+#endif
        }
 
        /* Make sure that IRQ works are initialized before enabling. */
@@ -279,13 +367,3 @@ void __init printk_safe_init(void)
        /* Flush pending messages that did not have scheduled IRQ works. */
        printk_safe_flush();
 }
-
-void printk_safe_nmi_enter(void)
-{
-       this_cpu_write(printk_func, vprintk_safe_nmi);
-}
-
-void printk_safe_nmi_exit(void)
-{
-       this_cpu_write(printk_func, vprintk_default);
-}
-- 
2.11.0

Reply via email to